From dc23a24fbe5d59429eacf45a9add82fdf6ab90e6 Mon Sep 17 00:00:00 2001 From: Cory Benfield Date: Wed, 22 May 2019 16:41:33 +0100 Subject: [PATCH] Update boringssl to ff62b38 (#108) --- Package.swift | 2 +- .../crypto/cipher_extra/e_aesgcmsiv.c | 5 + Sources/CNIOBoringSSL/crypto/dsa/dsa.c | 35 +- Sources/CNIOBoringSSL/crypto/engine/engine.c | 3 +- Sources/CNIOBoringSSL/crypto/err/err_data.c | 119 +- Sources/CNIOBoringSSL/crypto/evp/evp.c | 66 + Sources/CNIOBoringSSL/crypto/evp/internal.h | 5 + Sources/CNIOBoringSSL/crypto/evp/p_dsa_asn1.c | 5 + Sources/CNIOBoringSSL/crypto/evp/p_ec_asn1.c | 5 + Sources/CNIOBoringSSL/crypto/evp/p_ed25519.c | 38 +- .../CNIOBoringSSL/crypto/evp/p_ed25519_asn1.c | 111 +- Sources/CNIOBoringSSL/crypto/evp/p_rsa.c | 1 + Sources/CNIOBoringSSL/crypto/evp/p_rsa_asn1.c | 5 + .../CNIOBoringSSL/crypto/fipsmodule/aes/aes.c | 6 + .../crypto/fipsmodule/cipher/e_aes.c | 9 +- .../crypto/fipsmodule/digest/digest.c | 44 +- .../crypto/hrss/asm/poly_rq_mul.S | 2 + Sources/CNIOBoringSSL/crypto/mem.c | 5 +- Sources/CNIOBoringSSL/crypto/pkcs8/p5_pbev2.c | 37 +- Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8.c | 4 + .../CNIOBoringSSL/crypto/pkcs8/pkcs8_x509.c | 18 +- .../sike/asm/fp-armv8.ios.aarch64.S | 1196 ++++++++++ .../sike/asm/fp-armv8.linux.aarch64.S | 1198 ++++++++++ .../sike/asm/fp-x86_64.linux.x86_64.S | 2089 +++++++++++++++++ .../sike/asm/fp-x86_64.mac.x86_64.S | 2088 ++++++++++++++++ Sources/CNIOBoringSSL/crypto/x509/x509_txt.c | 9 +- Sources/CNIOBoringSSL/crypto/x509/x509_vfy.c | 63 +- .../CNIOBoringSSL/crypto/x509v3/internal.h | 5 + Sources/CNIOBoringSSL/crypto/x509v3/v3_utl.c | 78 +- Sources/CNIOBoringSSL/hash.txt | 2 +- .../CNIOBoringSSL/include/CNIOBoringSSL/aes.h | 12 +- .../include/CNIOBoringSSL/base.h | 18 + .../include/CNIOBoringSSL/digest.h | 5 + .../CNIOBoringSSL/include/CNIOBoringSSL/dsa.h | 1 + .../include/CNIOBoringSSL/engine.h | 4 +- .../CNIOBoringSSL/include/CNIOBoringSSL/evp.h | 52 +- .../CNIOBoringSSL/include/CNIOBoringSSL/ssl.h | 60 +- .../include/CNIOBoringSSL/x509_vfy.h | 2 + .../include/CNIOBoringSSL/x509v3.h | 6 +- .../include/boringssl_prefix_symbols.h | 41 +- .../include/boringssl_prefix_symbols_asm.h | 41 +- .../include/boringssl_prefix_symbols_nasm.inc | 82 +- Sources/CNIOBoringSSL/ssl/d1_both.cc | 2 +- Sources/CNIOBoringSSL/ssl/handoff.cc | 12 +- Sources/CNIOBoringSSL/ssl/handshake.cc | 1 + Sources/CNIOBoringSSL/ssl/handshake_server.cc | 10 +- Sources/CNIOBoringSSL/ssl/internal.h | 48 +- Sources/CNIOBoringSSL/ssl/s3_both.cc | 2 +- Sources/CNIOBoringSSL/ssl/s3_pkt.cc | 20 +- Sources/CNIOBoringSSL/ssl/ssl_lib.cc | 14 +- Sources/CNIOBoringSSL/ssl/ssl_x509.cc | 134 +- Sources/CNIOBoringSSL/ssl/t1_lib.cc | 75 +- Sources/CNIOBoringSSL/ssl/tls13_both.cc | 6 +- Sources/CNIOBoringSSL/ssl/tls13_client.cc | 1 + Sources/CNIOBoringSSL/ssl/tls13_server.cc | 89 +- Sources/CNIOBoringSSL/ssl/tls_method.cc | 32 +- Sources/CNIOBoringSSL/third_party/sike/P503.c | 100 + .../third_party/sike/asm/fp_generic.c | 181 ++ Sources/CNIOBoringSSL/third_party/sike/fpx.c | 305 +++ Sources/CNIOBoringSSL/third_party/sike/fpx.h | 112 + .../CNIOBoringSSL/third_party/sike/isogeny.c | 260 ++ .../CNIOBoringSSL/third_party/sike/isogeny.h | 49 + Sources/CNIOBoringSSL/third_party/sike/sike.c | 571 +++++ Sources/CNIOBoringSSL/third_party/sike/sike.h | 64 + .../CNIOBoringSSL/third_party/sike/utils.h | 143 ++ scripts/build-asm.py | 15 +- scripts/vendor-boringssl.sh | 6 + 67 files changed, 9438 insertions(+), 391 deletions(-) create mode 100644 Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.ios.aarch64.S create mode 100644 Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.linux.aarch64.S create mode 100644 Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.linux.x86_64.S create mode 100644 Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.mac.x86_64.S create mode 100644 Sources/CNIOBoringSSL/third_party/sike/P503.c create mode 100644 Sources/CNIOBoringSSL/third_party/sike/asm/fp_generic.c create mode 100644 Sources/CNIOBoringSSL/third_party/sike/fpx.c create mode 100644 Sources/CNIOBoringSSL/third_party/sike/fpx.h create mode 100644 Sources/CNIOBoringSSL/third_party/sike/isogeny.c create mode 100644 Sources/CNIOBoringSSL/third_party/sike/isogeny.h create mode 100644 Sources/CNIOBoringSSL/third_party/sike/sike.c create mode 100644 Sources/CNIOBoringSSL/third_party/sike/sike.h create mode 100644 Sources/CNIOBoringSSL/third_party/sike/utils.h diff --git a/Package.swift b/Package.swift index a711fb51..978260d9 100644 --- a/Package.swift +++ b/Package.swift @@ -22,7 +22,7 @@ import PackageDescription // Sources/CNIOBoringSSL directory. The source repository is at // https://boringssl.googlesource.com/boringssl. // -// BoringSSL Commit: ad9eee1628aa4dac2ac3528cb6bb5ddf27e73560 +// BoringSSL Commit: ff62b38b4b5a0e7926034b5f93d0c276e55b571d let package = Package( name: "swift-nio-ssl", diff --git a/Sources/CNIOBoringSSL/crypto/cipher_extra/e_aesgcmsiv.c b/Sources/CNIOBoringSSL/crypto/cipher_extra/e_aesgcmsiv.c index 49e6b0f8..d8302c68 100644 --- a/Sources/CNIOBoringSSL/crypto/cipher_extra/e_aesgcmsiv.c +++ b/Sources/CNIOBoringSSL/crypto/cipher_extra/e_aesgcmsiv.c @@ -426,6 +426,11 @@ static int aead_aes_gcm_siv_asm_open(const EVP_AEAD_CTX *ctx, uint8_t *out, return 0; } + if (nonce_len != EVP_AEAD_AES_GCM_SIV_NONCE_LEN) { + OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE); + return 0; + } + const struct aead_aes_gcm_siv_asm_ctx *gcm_siv_ctx = asm_ctx_from_ctx(ctx); const size_t plaintext_len = in_len - EVP_AEAD_AES_GCM_SIV_TAG_LEN; const uint8_t *const given_tag = in + plaintext_len; diff --git a/Sources/CNIOBoringSSL/crypto/dsa/dsa.c b/Sources/CNIOBoringSSL/crypto/dsa/dsa.c index 6497a25b..492d65b5 100644 --- a/Sources/CNIOBoringSSL/crypto/dsa/dsa.c +++ b/Sources/CNIOBoringSSL/crypto/dsa/dsa.c @@ -558,29 +558,34 @@ static int mod_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, } DSA_SIG *DSA_do_sign(const uint8_t *digest, size_t digest_len, const DSA *dsa) { - BIGNUM *kinv = NULL, *r = NULL, *s = NULL; - BIGNUM m; - BIGNUM xr; - BN_CTX *ctx = NULL; - int reason = ERR_R_BN_LIB; - DSA_SIG *ret = NULL; - - BN_init(&m); - BN_init(&xr); - if (!dsa->p || !dsa->q || !dsa->g) { - reason = DSA_R_MISSING_PARAMETERS; - goto err; + OPENSSL_PUT_ERROR(DSA, DSA_R_MISSING_PARAMETERS); + return NULL; + } + + // Reject invalid parameters. In particular, the algorithm will infinite loop + // if |g| is zero. + if (BN_is_zero(dsa->p) || BN_is_zero(dsa->q) || BN_is_zero(dsa->g)) { + OPENSSL_PUT_ERROR(DSA, DSA_R_INVALID_PARAMETERS); + return NULL; } // We only support DSA keys that are a multiple of 8 bits. (This is a weaker // check than the one in |DSA_do_check_signature|, which only allows 160-, // 224-, and 256-bit keys. if (BN_num_bits(dsa->q) % 8 != 0) { - reason = DSA_R_BAD_Q_VALUE; - goto err; + OPENSSL_PUT_ERROR(DSA, DSA_R_BAD_Q_VALUE); + return NULL; } + BIGNUM *kinv = NULL, *r = NULL, *s = NULL; + BIGNUM m; + BIGNUM xr; + BN_CTX *ctx = NULL; + DSA_SIG *ret = NULL; + + BN_init(&m); + BN_init(&xr); s = BN_new(); if (s == NULL) { goto err; @@ -640,7 +645,7 @@ DSA_SIG *DSA_do_sign(const uint8_t *digest, size_t digest_len, const DSA *dsa) { err: if (ret == NULL) { - OPENSSL_PUT_ERROR(DSA, reason); + OPENSSL_PUT_ERROR(DSA, ERR_R_BN_LIB); BN_free(r); BN_free(s); } diff --git a/Sources/CNIOBoringSSL/crypto/engine/engine.c b/Sources/CNIOBoringSSL/crypto/engine/engine.c index a9a971c4..287eb395 100644 --- a/Sources/CNIOBoringSSL/crypto/engine/engine.c +++ b/Sources/CNIOBoringSSL/crypto/engine/engine.c @@ -41,9 +41,10 @@ ENGINE *ENGINE_new(void) { return engine; } -void ENGINE_free(ENGINE *engine) { +int ENGINE_free(ENGINE *engine) { // Methods are currently required to be static so are not unref'ed. OPENSSL_free(engine); + return 1; } // set_method takes a pointer to a method and its given size and sets diff --git a/Sources/CNIOBoringSSL/crypto/err/err_data.c b/Sources/CNIOBoringSSL/crypto/err/err_data.c index 9ae81f45..49d664b7 100644 --- a/Sources/CNIOBoringSSL/crypto/err/err_data.c +++ b/Sources/CNIOBoringSSL/crypto/err/err_data.c @@ -79,7 +79,7 @@ const uint32_t kOpenSSLReasonValues[] = { 0x10331580, 0x10339599, 0x103415ac, - 0x10348f14, + 0x10348f27, 0x10350c60, 0x103595bf, 0x103615e9, @@ -120,7 +120,7 @@ const uint32_t kOpenSSLReasonValues[] = { 0x104798ad, 0x104818c2, 0x104898d0, - 0x10490e60, + 0x10490e73, 0x1049970a, 0x104a15d4, 0x14320c07, @@ -130,24 +130,24 @@ const uint32_t kOpenSSLReasonValues[] = { 0x143400ac, 0x143480ea, 0x18320083, - 0x18328f6a, + 0x18328f7d, 0x183300ac, - 0x18338f80, - 0x18340f94, + 0x18338f93, + 0x18340fa7, 0x183480ea, - 0x18350fa9, - 0x18358fc1, - 0x18360fd6, - 0x18368fea, - 0x1837100e, - 0x18379024, - 0x18381038, - 0x18389048, + 0x18350fbc, + 0x18358fd4, + 0x18360fe9, + 0x18368ffd, + 0x18371021, + 0x18379037, + 0x1838104b, + 0x1838905b, 0x18390a75, - 0x18399058, + 0x1839906b, 0x183a1080, 0x183a90a6, - 0x183b0c6c, + 0x183b0c7f, 0x183b90db, 0x183c10ed, 0x183c90f8, @@ -162,7 +162,7 @@ const uint32_t kOpenSSLReasonValues[] = { 0x184110c9, 0x18419094, 0x184210b3, - 0x1842906d, + 0x18428c6c, 0x203211d0, 0x203291bd, 0x243211dc, @@ -181,12 +181,13 @@ const uint32_t kOpenSSLReasonValues[] = { 0x24389293, 0x243912a6, 0x28320c54, - 0x28328c6c, + 0x28328c7f, 0x28330c24, - 0x28338c7f, + 0x28338c92, 0x28340c60, 0x283480ac, 0x283500ea, + 0x28358c6c, 0x2c322ec7, 0x2c3292bd, 0x2c332ed5, @@ -326,39 +327,39 @@ const uint32_t kOpenSSLReasonValues[] = { 0x34348bf1, 0x34350bd5, 0x3c320083, - 0x3c328ca9, - 0x3c330cc2, - 0x3c338cdd, - 0x3c340cfa, - 0x3c348d24, - 0x3c350d3f, - 0x3c358d65, - 0x3c360d7e, - 0x3c368d96, - 0x3c370da7, - 0x3c378db5, - 0x3c380dc2, - 0x3c388dd6, - 0x3c390c6c, - 0x3c398df9, - 0x3c3a0e0d, + 0x3c328cbc, + 0x3c330cd5, + 0x3c338cf0, + 0x3c340d0d, + 0x3c348d37, + 0x3c350d52, + 0x3c358d78, + 0x3c360d91, + 0x3c368da9, + 0x3c370dba, + 0x3c378dc8, + 0x3c380dd5, + 0x3c388de9, + 0x3c390c7f, + 0x3c398e0c, + 0x3c3a0e20, 0x3c3a890f, - 0x3c3b0e1d, - 0x3c3b8e38, - 0x3c3c0e4a, - 0x3c3c8e7d, - 0x3c3d0e87, - 0x3c3d8e9b, - 0x3c3e0ea9, - 0x3c3e8ece, - 0x3c3f0c95, - 0x3c3f8eb7, + 0x3c3b0e30, + 0x3c3b8e4b, + 0x3c3c0e5d, + 0x3c3c8e90, + 0x3c3d0e9a, + 0x3c3d8eae, + 0x3c3e0ebc, + 0x3c3e8ee1, + 0x3c3f0ca8, + 0x3c3f8eca, 0x3c4000ac, 0x3c4080ea, - 0x3c410d15, - 0x3c418d54, - 0x3c420e60, - 0x3c428dea, + 0x3c410d28, + 0x3c418d67, + 0x3c420e73, + 0x3c428dfd, 0x40321946, 0x4032995c, 0x4033198a, @@ -373,7 +374,7 @@ const uint32_t kOpenSSLReasonValues[] = { 0x40379a2b, 0x40381a36, 0x40389a48, - 0x40390f14, + 0x40390f27, 0x40399a58, 0x403a1a6b, 0x403a9a8c, @@ -710,17 +711,17 @@ const uint32_t kOpenSSLReasonValues[] = { 0x505035f2, 0x505086f1, 0x50513605, - 0x58320f52, - 0x68320f14, - 0x68328c6c, - 0x68330c7f, - 0x68338f22, - 0x68340f32, + 0x58320f65, + 0x68320f27, + 0x68328c7f, + 0x68330c92, + 0x68338f35, + 0x68340f45, 0x683480ea, - 0x6c320eda, + 0x6c320eed, 0x6c328c36, - 0x6c330ee5, - 0x6c338efe, + 0x6c330ef8, + 0x6c338f11, 0x74320a1b, 0x743280ac, 0x74330c47, @@ -924,6 +925,7 @@ const char kOpenSSLReasonStringData[] = "UNKNOWN_HASH\0" "BAD_Q_VALUE\0" "BAD_VERSION\0" + "INVALID_PARAMETERS\0" "MISSING_PARAMETERS\0" "NEED_NEW_SETUP_VALUES\0" "BIGNUM_OUT_OF_RANGE\0" @@ -974,7 +976,6 @@ const char kOpenSSLReasonStringData[] = "INVALID_KEYBITS\0" "INVALID_MGF1_MD\0" "INVALID_PADDING_MODE\0" - "INVALID_PARAMETERS\0" "INVALID_PSS_SALTLEN\0" "INVALID_SIGNATURE\0" "KEYS_NOT_SET\0" diff --git a/Sources/CNIOBoringSSL/crypto/evp/evp.c b/Sources/CNIOBoringSSL/crypto/evp/evp.c index 691f2a98..ffd5b58b 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/evp.c +++ b/Sources/CNIOBoringSSL/crypto/evp/evp.c @@ -330,7 +330,73 @@ int EVP_PKEY_set_type(EVP_PKEY *pkey, int type) { return 1; } +EVP_PKEY *EVP_PKEY_new_raw_private_key(int type, ENGINE *unused, + const uint8_t *in, size_t len) { + EVP_PKEY *ret = EVP_PKEY_new(); + if (ret == NULL || + !EVP_PKEY_set_type(ret, type)) { + goto err; + } + + if (ret->ameth->set_priv_raw == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); + goto err; + } + + if (!ret->ameth->set_priv_raw(ret, in, len)) { + goto err; + } + + return ret; + +err: + EVP_PKEY_free(ret); + return NULL; +} + +EVP_PKEY *EVP_PKEY_new_raw_public_key(int type, ENGINE *unused, + const uint8_t *in, size_t len) { + EVP_PKEY *ret = EVP_PKEY_new(); + if (ret == NULL || + !EVP_PKEY_set_type(ret, type)) { + goto err; + } + + if (ret->ameth->set_pub_raw == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); + goto err; + } + + if (!ret->ameth->set_pub_raw(ret, in, len)) { + goto err; + } + + return ret; + +err: + EVP_PKEY_free(ret); + return NULL; +} +int EVP_PKEY_get_raw_private_key(const EVP_PKEY *pkey, uint8_t *out, + size_t *out_len) { + if (pkey->ameth->get_priv_raw == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); + return 0; + } + + return pkey->ameth->get_priv_raw(pkey, out, out_len); +} + +int EVP_PKEY_get_raw_public_key(const EVP_PKEY *pkey, uint8_t *out, + size_t *out_len) { + if (pkey->ameth->get_pub_raw == NULL) { + OPENSSL_PUT_ERROR(EVP, EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE); + return 0; + } + + return pkey->ameth->get_pub_raw(pkey, out, out_len); +} int EVP_PKEY_cmp_parameters(const EVP_PKEY *a, const EVP_PKEY *b) { if (a->type != b->type) { diff --git a/Sources/CNIOBoringSSL/crypto/evp/internal.h b/Sources/CNIOBoringSSL/crypto/evp/internal.h index e8aa9c45..7628a541 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/internal.h +++ b/Sources/CNIOBoringSSL/crypto/evp/internal.h @@ -96,6 +96,11 @@ struct evp_pkey_asn1_method_st { // |out|. It returns one on success and zero on error. int (*priv_encode)(CBB *out, const EVP_PKEY *key); + int (*set_priv_raw)(EVP_PKEY *pkey, const uint8_t *in, size_t len); + int (*set_pub_raw)(EVP_PKEY *pkey, const uint8_t *in, size_t len); + int (*get_priv_raw)(const EVP_PKEY *pkey, uint8_t *out, size_t *out_len); + int (*get_pub_raw)(const EVP_PKEY *pkey, uint8_t *out, size_t *out_len); + // pkey_opaque returns 1 if the |pk| is opaque. Opaque keys are backed by // custom implementations which do not expose key material and parameters. int (*pkey_opaque)(const EVP_PKEY *pk); diff --git a/Sources/CNIOBoringSSL/crypto/evp/p_dsa_asn1.c b/Sources/CNIOBoringSSL/crypto/evp/p_dsa_asn1.c index 7ce02af6..a33b51d4 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/p_dsa_asn1.c +++ b/Sources/CNIOBoringSSL/crypto/evp/p_dsa_asn1.c @@ -255,6 +255,11 @@ const EVP_PKEY_ASN1_METHOD dsa_asn1_meth = { dsa_priv_decode, dsa_priv_encode, + NULL /* set_priv_raw */, + NULL /* set_pub_raw */, + NULL /* get_priv_raw */, + NULL /* get_pub_raw */, + NULL /* pkey_opaque */, int_dsa_size, diff --git a/Sources/CNIOBoringSSL/crypto/evp/p_ec_asn1.c b/Sources/CNIOBoringSSL/crypto/evp/p_ec_asn1.c index 846c201c..f1123524 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/p_ec_asn1.c +++ b/Sources/CNIOBoringSSL/crypto/evp/p_ec_asn1.c @@ -237,6 +237,11 @@ const EVP_PKEY_ASN1_METHOD ec_asn1_meth = { eckey_priv_decode, eckey_priv_encode, + NULL /* set_priv_raw */, + NULL /* set_pub_raw */, + NULL /* get_priv_raw */, + NULL /* get_pub_raw */, + eckey_opaque, int_ec_size, diff --git a/Sources/CNIOBoringSSL/crypto/evp/p_ed25519.c b/Sources/CNIOBoringSSL/crypto/evp/p_ed25519.c index 1aa25fb7..70f3af6a 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/p_ed25519.c +++ b/Sources/CNIOBoringSSL/crypto/evp/p_ed25519.c @@ -16,6 +16,7 @@ #include #include +#include #include "internal.h" @@ -23,6 +24,27 @@ // Ed25519 has no parameters to copy. static int pkey_ed25519_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) { return 1; } +static int pkey_ed25519_keygen(EVP_PKEY_CTX *ctx, EVP_PKEY *pkey) { + ED25519_KEY *key = OPENSSL_malloc(sizeof(ED25519_KEY)); + if (key == NULL) { + OPENSSL_PUT_ERROR(EVP, ERR_R_MALLOC_FAILURE); + return 0; + } + + if (!EVP_PKEY_set_type(pkey, EVP_PKEY_ED25519)) { + OPENSSL_free(key); + return 0; + } + + uint8_t pubkey_unused[32]; + ED25519_keypair(pubkey_unused, key->key.priv); + key->has_private = 1; + + OPENSSL_free(pkey->pkey.ptr); + pkey->pkey.ptr = key; + return 1; +} + static int pkey_ed25519_sign_message(EVP_PKEY_CTX *ctx, uint8_t *sig, size_t *siglen, const uint8_t *tbs, size_t tbslen) { @@ -32,12 +54,22 @@ static int pkey_ed25519_sign_message(EVP_PKEY_CTX *ctx, uint8_t *sig, return 0; } - *siglen = 64; if (sig == NULL) { + *siglen = 64; return 1; } - return ED25519_sign(sig, tbs, tbslen, key->key.priv); + if (*siglen < 64) { + OPENSSL_PUT_ERROR(EVP, EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + if (!ED25519_sign(sig, tbs, tbslen, key->key.priv)) { + return 0; + } + + *siglen = 64; + return 1; } static int pkey_ed25519_verify_message(EVP_PKEY_CTX *ctx, const uint8_t *sig, @@ -58,7 +90,7 @@ const EVP_PKEY_METHOD ed25519_pkey_meth = { NULL /* init */, pkey_ed25519_copy, NULL /* cleanup */, - NULL /* keygen */, + pkey_ed25519_keygen, NULL /* sign */, pkey_ed25519_sign_message, NULL /* verify */, diff --git a/Sources/CNIOBoringSSL/crypto/evp/p_ed25519_asn1.c b/Sources/CNIOBoringSSL/crypto/evp/p_ed25519_asn1.c index 43f5efe3..5862be74 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/p_ed25519_asn1.c +++ b/Sources/CNIOBoringSSL/crypto/evp/p_ed25519_asn1.c @@ -28,45 +28,101 @@ static void ed25519_free(EVP_PKEY *pkey) { pkey->pkey.ptr = NULL; } -static int set_pubkey(EVP_PKEY *pkey, const uint8_t pubkey[32]) { +static int ed25519_set_priv_raw(EVP_PKEY *pkey, const uint8_t *in, size_t len) { + if (len != 32) { + OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR); + return 0; + } + ED25519_KEY *key = OPENSSL_malloc(sizeof(ED25519_KEY)); if (key == NULL) { OPENSSL_PUT_ERROR(EVP, ERR_R_MALLOC_FAILURE); return 0; } - key->has_private = 0; - OPENSSL_memcpy(key->key.pub.value, pubkey, 32); + + // The RFC 8032 encoding stores only the 32-byte seed, so we must recover the + // full representation which we use from it. + uint8_t pubkey_unused[32]; + ED25519_keypair_from_seed(pubkey_unused, key->key.priv, in); + key->has_private = 1; ed25519_free(pkey); pkey->pkey.ptr = key; return 1; } -static int set_privkey(EVP_PKEY *pkey, const uint8_t privkey[64]) { +static int ed25519_set_pub_raw(EVP_PKEY *pkey, const uint8_t *in, size_t len) { + if (len != 32) { + OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR); + return 0; + } + ED25519_KEY *key = OPENSSL_malloc(sizeof(ED25519_KEY)); if (key == NULL) { OPENSSL_PUT_ERROR(EVP, ERR_R_MALLOC_FAILURE); return 0; } - key->has_private = 1; - OPENSSL_memcpy(key->key.priv, privkey, 64); + + OPENSSL_memcpy(key->key.pub.value, in, 32); + key->has_private = 0; ed25519_free(pkey); pkey->pkey.ptr = key; return 1; } +static int ed25519_get_priv_raw(const EVP_PKEY *pkey, uint8_t *out, + size_t *out_len) { + const ED25519_KEY *key = pkey->pkey.ptr; + if (!key->has_private) { + OPENSSL_PUT_ERROR(EVP, EVP_R_NOT_A_PRIVATE_KEY); + return 0; + } + + if (out == NULL) { + *out_len = 32; + return 1; + } + + if (*out_len < 32) { + OPENSSL_PUT_ERROR(EVP, EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + // The raw private key format is the first 32 bytes of the private key. + OPENSSL_memcpy(out, key->key.priv, 32); + *out_len = 32; + return 1; +} + +static int ed25519_get_pub_raw(const EVP_PKEY *pkey, uint8_t *out, + size_t *out_len) { + const ED25519_KEY *key = pkey->pkey.ptr; + if (out == NULL) { + *out_len = 32; + return 1; + } + + if (*out_len < 32) { + OPENSSL_PUT_ERROR(EVP, EVP_R_BUFFER_TOO_SMALL); + return 0; + } + + OPENSSL_memcpy(out, key->key.pub.value, 32); + *out_len = 32; + return 1; +} + static int ed25519_pub_decode(EVP_PKEY *out, CBS *params, CBS *key) { // See RFC 8410, section 4. // The parameters must be omitted. Public keys have length 32. - if (CBS_len(params) != 0 || - CBS_len(key) != 32) { + if (CBS_len(params) != 0) { OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR); return 0; } - return set_pubkey(out, CBS_data(key)); + return ed25519_set_pub_raw(out, CBS_data(key), CBS_len(key)); } static int ed25519_pub_encode(CBB *out, const EVP_PKEY *pkey) { @@ -103,17 +159,12 @@ static int ed25519_priv_decode(EVP_PKEY *out, CBS *params, CBS *key) { CBS inner; if (CBS_len(params) != 0 || !CBS_get_asn1(key, &inner, CBS_ASN1_OCTETSTRING) || - CBS_len(key) != 0 || - CBS_len(&inner) != 32) { + CBS_len(key) != 0) { OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR); return 0; } - // The PKCS#8 encoding stores only the 32-byte seed, so we must recover the - // full representation which we use from it. - uint8_t pubkey[32], privkey[64]; - ED25519_keypair_from_seed(pubkey, privkey, CBS_data(&inner)); - return set_privkey(out, privkey); + return ed25519_set_priv_raw(out, CBS_data(&inner), CBS_len(&inner)); } static int ed25519_priv_encode(CBB *out, const EVP_PKEY *pkey) { @@ -156,6 +207,10 @@ const EVP_PKEY_ASN1_METHOD ed25519_asn1_meth = { ed25519_pub_cmp, ed25519_priv_decode, ed25519_priv_encode, + ed25519_set_priv_raw, + ed25519_set_pub_raw, + ed25519_get_priv_raw, + ed25519_get_pub_raw, NULL /* pkey_opaque */, ed25519_size, ed25519_bits, @@ -164,27 +219,3 @@ const EVP_PKEY_ASN1_METHOD ed25519_asn1_meth = { NULL /* param_cmp */, ed25519_free, }; - -EVP_PKEY *EVP_PKEY_new_ed25519_public(const uint8_t public_key[32]) { - EVP_PKEY *ret = EVP_PKEY_new(); - if (ret == NULL || - !EVP_PKEY_set_type(ret, EVP_PKEY_ED25519) || - !set_pubkey(ret, public_key)) { - EVP_PKEY_free(ret); - return NULL; - } - - return ret; -} - -EVP_PKEY *EVP_PKEY_new_ed25519_private(const uint8_t private_key[64]) { - EVP_PKEY *ret = EVP_PKEY_new(); - if (ret == NULL || - !EVP_PKEY_set_type(ret, EVP_PKEY_ED25519) || - !set_privkey(ret, private_key)) { - EVP_PKEY_free(ret); - return NULL; - } - - return ret; -} diff --git a/Sources/CNIOBoringSSL/crypto/evp/p_rsa.c b/Sources/CNIOBoringSSL/crypto/evp/p_rsa.c index d4228952..3ad44e0f 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/p_rsa.c +++ b/Sources/CNIOBoringSSL/crypto/evp/p_rsa.c @@ -132,6 +132,7 @@ static int pkey_rsa_copy(EVP_PKEY_CTX *dst, EVP_PKEY_CTX *src) { dctx->pad_mode = sctx->pad_mode; dctx->md = sctx->md; dctx->mgf1md = sctx->mgf1md; + dctx->saltlen = sctx->saltlen; if (sctx->oaep_label) { OPENSSL_free(dctx->oaep_label); dctx->oaep_label = BUF_memdup(sctx->oaep_label, sctx->oaep_labellen); diff --git a/Sources/CNIOBoringSSL/crypto/evp/p_rsa_asn1.c b/Sources/CNIOBoringSSL/crypto/evp/p_rsa_asn1.c index 70deff83..9f977d63 100644 --- a/Sources/CNIOBoringSSL/crypto/evp/p_rsa_asn1.c +++ b/Sources/CNIOBoringSSL/crypto/evp/p_rsa_asn1.c @@ -178,6 +178,11 @@ const EVP_PKEY_ASN1_METHOD rsa_asn1_meth = { rsa_priv_decode, rsa_priv_encode, + NULL /* set_priv_raw */, + NULL /* set_pub_raw */, + NULL /* get_priv_raw */, + NULL /* get_pub_raw */, + rsa_opaque, int_rsa_size, diff --git a/Sources/CNIOBoringSSL/crypto/fipsmodule/aes/aes.c b/Sources/CNIOBoringSSL/crypto/fipsmodule/aes/aes.c index d70fdef9..1dcdf071 100644 --- a/Sources/CNIOBoringSSL/crypto/fipsmodule/aes/aes.c +++ b/Sources/CNIOBoringSSL/crypto/fipsmodule/aes/aes.c @@ -834,6 +834,9 @@ void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) { } int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) { + if (bits != 128 && bits != 192 && bits != 256) { + return -2; + } if (hwaes_capable()) { return aes_hw_set_encrypt_key(key, bits, aeskey); } else if (vpaes_capable()) { @@ -844,6 +847,9 @@ int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) { } int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) { + if (bits != 128 && bits != 192 && bits != 256) { + return -2; + } if (hwaes_capable()) { return aes_hw_set_decrypt_key(key, bits, aeskey); } else if (vpaes_capable()) { diff --git a/Sources/CNIOBoringSSL/crypto/fipsmodule/cipher/e_aes.c b/Sources/CNIOBoringSSL/crypto/fipsmodule/cipher/e_aes.c index df9166bf..3d26cc07 100644 --- a/Sources/CNIOBoringSSL/crypto/fipsmodule/cipher/e_aes.c +++ b/Sources/CNIOBoringSSL/crypto/fipsmodule/cipher/e_aes.c @@ -456,6 +456,9 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { case EVP_CTRL_COPY: { EVP_CIPHER_CTX *out = ptr; EVP_AES_GCM_CTX *gctx_out = aes_gcm_from_cipher_ctx(out); + // |EVP_CIPHER_CTX_copy| copies this generically, but we must redo it in + // case |out->cipher_data| and |in->cipher_data| are differently aligned. + OPENSSL_memcpy(gctx_out, gctx, sizeof(EVP_AES_GCM_CTX)); if (gctx->iv == c->iv) { gctx_out->iv = out->iv; } else { @@ -590,7 +593,7 @@ DEFINE_LOCAL_DATA(EVP_CIPHER, aes_128_gcm_generic) { out->key_len = 16; out->iv_len = 12; out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING; - out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | + out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER; out->init = aes_gcm_init_key; @@ -658,7 +661,7 @@ DEFINE_LOCAL_DATA(EVP_CIPHER, aes_192_gcm_generic) { out->key_len = 24; out->iv_len = 12; out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING; - out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | + out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER; out->init = aes_gcm_init_key; @@ -726,7 +729,7 @@ DEFINE_LOCAL_DATA(EVP_CIPHER, aes_256_gcm_generic) { out->key_len = 32; out->iv_len = 12; out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING; - out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | + out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER; out->init = aes_gcm_init_key; diff --git a/Sources/CNIOBoringSSL/crypto/fipsmodule/digest/digest.c b/Sources/CNIOBoringSSL/crypto/fipsmodule/digest/digest.c index 57454124..7a861c1a 100644 --- a/Sources/CNIOBoringSSL/crypto/fipsmodule/digest/digest.c +++ b/Sources/CNIOBoringSSL/crypto/fipsmodule/digest/digest.c @@ -115,8 +115,15 @@ void EVP_MD_CTX_free(EVP_MD_CTX *ctx) { void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx) { EVP_MD_CTX_free(ctx); } +int EVP_DigestFinalXOF(EVP_MD_CTX *ctx, uint8_t *out, size_t len) { + OPENSSL_PUT_ERROR(DIGEST, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); + return 0; +} + int EVP_MD_CTX_copy_ex(EVP_MD_CTX *out, const EVP_MD_CTX *in) { - if (in == NULL || in->digest == NULL) { + // |in->digest| may be NULL if this is a signing |EVP_MD_CTX| for, e.g., + // Ed25519 which does not hash with |EVP_MD_CTX|. + if (in == NULL || (in->pctx == NULL && in->digest == NULL)) { OPENSSL_PUT_ERROR(DIGEST, DIGEST_R_INPUT_NOT_INITIALIZED); return 0; } @@ -131,29 +138,34 @@ int EVP_MD_CTX_copy_ex(EVP_MD_CTX *out, const EVP_MD_CTX *in) { } } - uint8_t *tmp_buf; - if (out->digest != in->digest) { - assert(in->digest->ctx_size != 0); - tmp_buf = OPENSSL_malloc(in->digest->ctx_size); - if (tmp_buf == NULL) { - if (pctx) { - in->pctx_ops->free(pctx); + uint8_t *tmp_buf = NULL; + if (in->digest != NULL) { + if (out->digest != in->digest) { + assert(in->digest->ctx_size != 0); + tmp_buf = OPENSSL_malloc(in->digest->ctx_size); + if (tmp_buf == NULL) { + if (pctx) { + in->pctx_ops->free(pctx); + } + OPENSSL_PUT_ERROR(DIGEST, ERR_R_MALLOC_FAILURE); + return 0; } - OPENSSL_PUT_ERROR(DIGEST, ERR_R_MALLOC_FAILURE); - return 0; + } else { + // |md_data| will be the correct size in this case. It's removed from + // |out| so that |EVP_MD_CTX_cleanup| doesn't free it, and then it's + // reused. + tmp_buf = out->md_data; + out->md_data = NULL; } - } else { - // |md_data| will be the correct size in this case. It's removed from |out| - // so that |EVP_MD_CTX_cleanup| doesn't free it, and then it's reused. - tmp_buf = out->md_data; - out->md_data = NULL; } EVP_MD_CTX_cleanup(out); out->digest = in->digest; out->md_data = tmp_buf; - OPENSSL_memcpy(out->md_data, in->md_data, in->digest->ctx_size); + if (in->digest != NULL) { + OPENSSL_memcpy(out->md_data, in->md_data, in->digest->ctx_size); + } out->pctx = pctx; out->pctx_ops = in->pctx_ops; assert(out->pctx == NULL || out->pctx_ops != NULL); diff --git a/Sources/CNIOBoringSSL/crypto/hrss/asm/poly_rq_mul.S b/Sources/CNIOBoringSSL/crypto/hrss/asm/poly_rq_mul.S index a9682952..32257d53 100644 --- a/Sources/CNIOBoringSSL/crypto/hrss/asm/poly_rq_mul.S +++ b/Sources/CNIOBoringSSL/crypto/hrss/asm/poly_rq_mul.S @@ -1,4 +1,5 @@ #define BORINGSSL_PREFIX CNIOBoringSSL +#if defined(__x86_64__) && defined(__linux__) // Copyright (c) 2017, the HRSS authors. // // Permission to use, copy, modify, and/or distribute this software for any @@ -8462,3 +8463,4 @@ ret .size poly_Rq_mul,.-poly_Rq_mul #endif +#endif // defined(__x86_64__) && defined(__linux__) diff --git a/Sources/CNIOBoringSSL/crypto/mem.c b/Sources/CNIOBoringSSL/crypto/mem.c index c7cc69f3..8db201a0 100644 --- a/Sources/CNIOBoringSSL/crypto/mem.c +++ b/Sources/CNIOBoringSSL/crypto/mem.c @@ -79,7 +79,10 @@ static void __asan_poison_memory_region(const void *addr, size_t size) {} static void __asan_unpoison_memory_region(const void *addr, size_t size) {} #endif -#if defined(__GNUC__) || defined(__clang__) +// Windows doesn't really support weak symbols as of May 2019, and Clang on +// Windows will emit strong symbols instead. See +// https://bugs.llvm.org/show_bug.cgi?id=37598 +#if defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)) // sdallocx is a sized |free| function. By passing the size (which we happen to // always know in BoringSSL), the malloc implementation can save work. We cannot // depend on |sdallocx| being available so we declare a wrapper that falls back diff --git a/Sources/CNIOBoringSSL/crypto/pkcs8/p5_pbev2.c b/Sources/CNIOBoringSSL/crypto/pkcs8/p5_pbev2.c index 4a31e2ea..ced37d10 100644 --- a/Sources/CNIOBoringSSL/crypto/pkcs8/p5_pbev2.c +++ b/Sources/CNIOBoringSSL/crypto/pkcs8/p5_pbev2.c @@ -81,6 +81,10 @@ static const uint8_t kPBES2[] = {0x2a, 0x86, 0x48, 0x86, 0xf7, static const uint8_t kHMACWithSHA1[] = {0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x07}; +// 1.2.840.113549.2.9 +static const uint8_t kHMACWithSHA256[] = {0x2a, 0x86, 0x48, 0x86, + 0xf7, 0x0d, 0x02, 0x09}; + static const struct { uint8_t oid[9]; uint8_t oid_len; @@ -140,18 +144,18 @@ static int add_cipher_oid(CBB *out, int nid) { } static int pkcs5_pbe2_cipher_init(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher, - unsigned iterations, const char *pass, - size_t pass_len, const uint8_t *salt, - size_t salt_len, const uint8_t *iv, - size_t iv_len, int enc) { + const EVP_MD *pbkdf2_md, unsigned iterations, + const char *pass, size_t pass_len, + const uint8_t *salt, size_t salt_len, + const uint8_t *iv, size_t iv_len, int enc) { if (iv_len != EVP_CIPHER_iv_length(cipher)) { OPENSSL_PUT_ERROR(PKCS8, PKCS8_R_ERROR_SETTING_CIPHER_PARAMS); return 0; } uint8_t key[EVP_MAX_KEY_LENGTH]; - int ret = PKCS5_PBKDF2_HMAC_SHA1(pass, pass_len, salt, salt_len, iterations, - EVP_CIPHER_key_length(cipher), key) && + int ret = PKCS5_PBKDF2_HMAC(pass, pass_len, salt, salt_len, iterations, + pbkdf2_md, EVP_CIPHER_key_length(cipher), key) && EVP_CipherInit_ex(ctx, cipher, NULL /* engine */, key, iv, enc); OPENSSL_cleanse(key, EVP_MAX_KEY_LENGTH); return ret; @@ -201,9 +205,9 @@ int PKCS5_pbe2_encrypt_init(CBB *out, EVP_CIPHER_CTX *ctx, return 0; } - return pkcs5_pbe2_cipher_init(ctx, cipher, iterations, pass, pass_len, salt, - salt_len, iv, EVP_CIPHER_iv_length(cipher), - 1 /* encrypt */); + return pkcs5_pbe2_cipher_init(ctx, cipher, EVP_sha1(), iterations, pass, + pass_len, salt, salt_len, iv, + EVP_CIPHER_iv_length(cipher), 1 /* encrypt */); } int PKCS5_pbe2_decrypt_init(const struct pbe_suite *suite, EVP_CIPHER_CTX *ctx, @@ -264,6 +268,7 @@ int PKCS5_pbe2_decrypt_init(const struct pbe_suite *suite, EVP_CIPHER_CTX *ctx, } } + const EVP_MD *md = EVP_sha1(); if (CBS_len(&pbkdf2_params) != 0) { CBS alg_id, prf; if (!CBS_get_asn1(&pbkdf2_params, &alg_id, CBS_ASN1_SEQUENCE) || @@ -273,14 +278,18 @@ int PKCS5_pbe2_decrypt_init(const struct pbe_suite *suite, EVP_CIPHER_CTX *ctx, return 0; } - // We only support hmacWithSHA1. It is the DEFAULT, so DER requires it be - // omitted, but we match OpenSSL in tolerating it being present. - if (!CBS_mem_equal(&prf, kHMACWithSHA1, sizeof(kHMACWithSHA1))) { + if (CBS_mem_equal(&prf, kHMACWithSHA1, sizeof(kHMACWithSHA1))) { + // hmacWithSHA1 is the DEFAULT, so DER requires it be omitted, but we + // match OpenSSL in tolerating it being present. + md = EVP_sha1(); + } else if (CBS_mem_equal(&prf, kHMACWithSHA256, sizeof(kHMACWithSHA256))) { + md = EVP_sha256(); + } else { OPENSSL_PUT_ERROR(PKCS8, PKCS8_R_UNSUPPORTED_PRF); return 0; } - // hmacWithSHA1 has a NULL parameter. + // All supported PRFs use a NULL parameter. CBS null; if (!CBS_get_asn1(&alg_id, &null, CBS_ASN1_NULL) || CBS_len(&null) != 0 || @@ -301,7 +310,7 @@ int PKCS5_pbe2_decrypt_init(const struct pbe_suite *suite, EVP_CIPHER_CTX *ctx, return 0; } - return pkcs5_pbe2_cipher_init(ctx, cipher, (unsigned)iterations, pass, + return pkcs5_pbe2_cipher_init(ctx, cipher, md, (unsigned)iterations, pass, pass_len, CBS_data(&salt), CBS_len(&salt), CBS_data(&iv), CBS_len(&iv), 0 /* decrypt */); } diff --git a/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8.c b/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8.c index 37bc15ec..84028224 100644 --- a/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8.c +++ b/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8.c @@ -486,6 +486,10 @@ int PKCS8_marshal_encrypted_private_key(CBB *out, int pbe_nid, goto err; } + // TODO(davidben): OpenSSL has since extended |pbe_nid| to control either the + // PBES1 scheme or the PBES2 PRF. E.g. passing |NID_hmacWithSHA256| will + // select PBES2 with HMAC-SHA256 as the PRF. Implement this if anything uses + // it. See 5693a30813a031d3921a016a870420e7eb93ec90 in OpenSSL. int alg_ok; if (pbe_nid == -1) { alg_ok = PKCS5_pbe2_encrypt_init(&epki, &ctx, cipher, (unsigned)iterations, diff --git a/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8_x509.c b/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8_x509.c index 54139716..e406c3d0 100644 --- a/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8_x509.c +++ b/Sources/CNIOBoringSSL/crypto/pkcs8/pkcs8_x509.c @@ -293,6 +293,10 @@ static int PKCS12_handle_sequence( return ret; } +// 1.2.840.113549.1.12.10.1.1 +static const uint8_t kKeyBag[] = {0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, + 0x01, 0x0c, 0x0a, 0x01, 0x01}; + // 1.2.840.113549.1.12.10.1.2 static const uint8_t kPKCS8ShroudedKeyBag[] = { 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x0c, 0x0a, 0x01, 0x02}; @@ -392,16 +396,20 @@ static int PKCS12_handle_safe_bag(CBS *safe_bag, struct pkcs12_context *ctx) { return 0; } - if (CBS_mem_equal(&bag_id, kPKCS8ShroudedKeyBag, - sizeof(kPKCS8ShroudedKeyBag))) { - // See RFC 7292, section 4.2.2. + const int is_key_bag = CBS_mem_equal(&bag_id, kKeyBag, sizeof(kKeyBag)); + const int is_shrouded_key_bag = CBS_mem_equal(&bag_id, kPKCS8ShroudedKeyBag, + sizeof(kPKCS8ShroudedKeyBag)); + if (is_key_bag || is_shrouded_key_bag) { + // See RFC 7292, section 4.2.1 and 4.2.2. if (*ctx->out_key) { OPENSSL_PUT_ERROR(PKCS8, PKCS8_R_MULTIPLE_PRIVATE_KEYS_IN_PKCS12); return 0; } - EVP_PKEY *pkey = PKCS8_parse_encrypted_private_key( - &wrapped_value, ctx->password, ctx->password_len); + EVP_PKEY *pkey = + is_key_bag ? EVP_parse_private_key(&wrapped_value) + : PKCS8_parse_encrypted_private_key( + &wrapped_value, ctx->password, ctx->password_len); if (pkey == NULL) { return 0; } diff --git a/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.ios.aarch64.S b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.ios.aarch64.S new file mode 100644 index 00000000..ba83bada --- /dev/null +++ b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.ios.aarch64.S @@ -0,0 +1,1196 @@ +#define BORINGSSL_PREFIX CNIOBoringSSL +#if defined(__aarch64__) && defined(__APPLE__) +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +#define OPENSSL_NO_ASM +#endif +#endif + +#if !defined(OPENSSL_NO_ASM) +#if defined(BORINGSSL_PREFIX) +#include +#endif +.section __TEXT,__const + +Lp503p1_nz_s8: +.quad 0x085BDA2211E7A0AC, 0x9BF6C87B7E7DAF13 +.quad 0x45C6BDDA77A4D01B, 0x4066F541811E1E60 + +Lp503x2: +.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF +.quad 0x57FFFFFFFFFFFFFF, 0x2610B7B44423CF41 +.quad 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0 +.quad 0x0080CDEA83023C3C + +.text +.globl _sike_mpmul +.private_extern _sike_mpmul +.align 4 +_sike_mpmul: + stp x29, x30, [sp,#-96]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + + ldp x3, x4, [x0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + ldp x15, x16, [x1,#32] + ldp x17, x19, [x1,#48] + + // x3-x7 <- AH + AL, x7 <- carry + adds x3, x3, x7 + adcs x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + adc x7, xzr, xzr + + // x11-x14 <- BH + BL, x8 <- carry + adds x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, x17 + adcs x14, x14, x19 + adc x8, xzr, xzr + + // x9 <- combined carry + and x9, x7, x8 + // x7-x8 <- mask + sub x7, xzr, x7 + sub x8, xzr, x8 + + + // x15-x19 <- masked (BH + BL) + and x15, x11, x7 + and x16, x12, x7 + and x17, x13, x7 + and x19, x14, x7 + + // x20-x23 <- masked (AH + AL) + and x20, x3, x8 + and x21, x4, x8 + and x22, x5, x8 + and x23, x6, x8 + + // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1 + adds x15, x15, x20 + adcs x16, x16, x21 + adcs x17, x17, x22 + adcs x19, x19, x23 + adc x7, x9, xzr + + // x8-x10,x20-x24 <- (AH+AL) x (BH+BL), low part + stp x3, x4, [x2,#0] + // A0-A1 <- AH + AL, T0 <- mask + adds x3, x3, x5 + adcs x4, x4, x6 + adc x25, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds x23, x11, x13 + adcs x26, x12, x14 + adc x24, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub x10, xzr, x25 + sub x20, xzr, x24 + and x8, x23, x10 + and x9, x26, x10 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and x21, x3, x20 + and x22, x4, x20 + mul x10, x3, x23 + mul x20, x3, x26 + and x25, x25, x24 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds x8, x21, x8 + umulh x21, x3, x26 + adcs x9, x22, x9 + umulh x22, x3, x23 + adc x25, x25, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + mul x3, x4, x23 + umulh x23, x4, x23 + adds x20, x20, x22 + adc x21, x21, xzr + + mul x24, x4, x26 + umulh x26, x4, x26 + adds x20, x20, x3 + adcs x21, x21, x23 + adc x22, xzr, xzr + + adds x21, x21, x24 + adc x22, x22, x26 + + ldp x3, x4, [x2,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds x21, x8, x21 + umulh x24, x3, x11 + umulh x26, x3, x12 + adcs x22, x9, x22 + mul x8, x3, x11 + mul x9, x3, x12 + adc x25, x25, xzr + + // C0-C1, T1, C7 <- AL x BL + mul x3, x4, x11 + umulh x11, x4, x11 + adds x9, x9, x24 + adc x26, x26, xzr + + mul x23, x4, x12 + umulh x12, x4, x12 + adds x9, x9, x3 + adcs x26, x26, x11 + adc x24, xzr, xzr + + adds x26, x26, x23 + adc x24, x24, x12 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul x3, x5, x13 + umulh x11, x5, x13 + subs x10, x10, x8 + sbcs x20, x20, x9 + sbcs x21, x21, x26 + mul x4, x5, x14 + umulh x23, x5, x14 + sbcs x22, x22, x24 + sbc x25, x25, xzr + + // A0, A1, C6, B0 <- AH x BH + mul x5, x6, x13 + umulh x13, x6, x13 + adds x4, x4, x11 + adc x23, x23, xzr + + mul x12, x6, x14 + umulh x14, x6, x14 + adds x4, x4, x5 + adcs x23, x23, x13 + adc x11, xzr, xzr + + adds x23, x23, x12 + adc x11, x11, x14 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x10, x10, x3 + sbcs x20, x20, x4 + sbcs x21, x21, x23 + sbcs x22, x22, x11 + sbc x25, x25, xzr + + adds x10, x10, x26 + adcs x20, x20, x24 + adcs x21, x21, x3 + adcs x22, x22, x4 + adcs x23, x25, x23 + adc x24, x11, xzr + + + // x15-x19, x7 <- (AH+AL) x (BH+BL), final step + adds x15, x15, x21 + adcs x16, x16, x22 + adcs x17, x17, x23 + adcs x19, x19, x24 + adc x7, x7, xzr + + // Load AL + ldp x3, x4, [x0] + ldp x5, x6, [x0,#16] + // Load BL + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + // Temporarily store x8,x9 in x2 + stp x8,x9, [x2,#0] + // x21-x28 <- AL x BL + // A0-A1 <- AH + AL, T0 <- mask + adds x3, x3, x5 + adcs x4, x4, x6 + adc x8, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds x27, x11, x13 + adcs x9, x12, x14 + adc x28, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub x23, xzr, x8 + sub x24, xzr, x28 + and x21, x27, x23 + and x22, x9, x23 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and x25, x3, x24 + and x26, x4, x24 + mul x23, x3, x27 + mul x24, x3, x9 + and x8, x8, x28 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds x21, x25, x21 + umulh x25, x3, x9 + adcs x22, x26, x22 + umulh x26, x3, x27 + adc x8, x8, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + mul x3, x4, x27 + umulh x27, x4, x27 + adds x24, x24, x26 + adc x25, x25, xzr + + mul x28, x4, x9 + umulh x9, x4, x9 + adds x24, x24, x3 + adcs x25, x25, x27 + adc x26, xzr, xzr + + adds x25, x25, x28 + adc x26, x26, x9 + + ldp x3, x4, [x0,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds x25, x21, x25 + umulh x28, x3, x11 + umulh x9, x3, x12 + adcs x26, x22, x26 + mul x21, x3, x11 + mul x22, x3, x12 + adc x8, x8, xzr + + // C0-C1, T1, C7 <- AL x BL + mul x3, x4, x11 + umulh x11, x4, x11 + adds x22, x22, x28 + adc x9, x9, xzr + + mul x27, x4, x12 + umulh x12, x4, x12 + adds x22, x22, x3 + adcs x9, x9, x11 + adc x28, xzr, xzr + + adds x9, x9, x27 + adc x28, x28, x12 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul x3, x5, x13 + umulh x11, x5, x13 + subs x23, x23, x21 + sbcs x24, x24, x22 + sbcs x25, x25, x9 + mul x4, x5, x14 + umulh x27, x5, x14 + sbcs x26, x26, x28 + sbc x8, x8, xzr + + // A0, A1, C6, B0 <- AH x BH + mul x5, x6, x13 + umulh x13, x6, x13 + adds x4, x4, x11 + adc x27, x27, xzr + + mul x12, x6, x14 + umulh x14, x6, x14 + adds x4, x4, x5 + adcs x27, x27, x13 + adc x11, xzr, xzr + + adds x27, x27, x12 + adc x11, x11, x14 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x23, x23, x3 + sbcs x24, x24, x4 + sbcs x25, x25, x27 + sbcs x26, x26, x11 + sbc x8, x8, xzr + + adds x23, x23, x9 + adcs x24, x24, x28 + adcs x25, x25, x3 + adcs x26, x26, x4 + adcs x27, x8, x27 + adc x28, x11, xzr + + // Restore x8,x9 + ldp x8,x9, [x2,#0] + + // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL + subs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + sbcs x20, x20, x24 + sbcs x15, x15, x25 + sbcs x16, x16, x26 + sbcs x17, x17, x27 + sbcs x19, x19, x28 + sbc x7, x7, xzr + + // Store ALxBL, low + stp x21, x22, [x2] + stp x23, x24, [x2,#16] + + // Load AH + ldp x3, x4, [x0,#32] + ldp x5, x6, [x0,#48] + // Load BH + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + + adds x8, x8, x25 + adcs x9, x9, x26 + adcs x10, x10, x27 + adcs x20, x20, x28 + adc x1, xzr, xzr + + add x0, x0, #32 + // Temporarily store x8,x9 in x2 + stp x8,x9, [x2,#32] + // x21-x28 <- AH x BH + // A0-A1 <- AH + AL, T0 <- mask + adds x3, x3, x5 + adcs x4, x4, x6 + adc x8, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds x27, x11, x13 + adcs x9, x12, x14 + adc x28, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub x23, xzr, x8 + sub x24, xzr, x28 + and x21, x27, x23 + and x22, x9, x23 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and x25, x3, x24 + and x26, x4, x24 + mul x23, x3, x27 + mul x24, x3, x9 + and x8, x8, x28 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds x21, x25, x21 + umulh x25, x3, x9 + adcs x22, x26, x22 + umulh x26, x3, x27 + adc x8, x8, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + mul x3, x4, x27 + umulh x27, x4, x27 + adds x24, x24, x26 + adc x25, x25, xzr + + mul x28, x4, x9 + umulh x9, x4, x9 + adds x24, x24, x3 + adcs x25, x25, x27 + adc x26, xzr, xzr + + adds x25, x25, x28 + adc x26, x26, x9 + + ldp x3, x4, [x0,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds x25, x21, x25 + umulh x28, x3, x11 + umulh x9, x3, x12 + adcs x26, x22, x26 + mul x21, x3, x11 + mul x22, x3, x12 + adc x8, x8, xzr + + // C0-C1, T1, C7 <- AL x BL + mul x3, x4, x11 + umulh x11, x4, x11 + adds x22, x22, x28 + adc x9, x9, xzr + + mul x27, x4, x12 + umulh x12, x4, x12 + adds x22, x22, x3 + adcs x9, x9, x11 + adc x28, xzr, xzr + + adds x9, x9, x27 + adc x28, x28, x12 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul x3, x5, x13 + umulh x11, x5, x13 + subs x23, x23, x21 + sbcs x24, x24, x22 + sbcs x25, x25, x9 + mul x4, x5, x14 + umulh x27, x5, x14 + sbcs x26, x26, x28 + sbc x8, x8, xzr + + // A0, A1, C6, B0 <- AH x BH + mul x5, x6, x13 + umulh x13, x6, x13 + adds x4, x4, x11 + adc x27, x27, xzr + + mul x12, x6, x14 + umulh x14, x6, x14 + adds x4, x4, x5 + adcs x27, x27, x13 + adc x11, xzr, xzr + + adds x27, x27, x12 + adc x11, x11, x14 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x23, x23, x3 + sbcs x24, x24, x4 + sbcs x25, x25, x27 + sbcs x26, x26, x11 + sbc x8, x8, xzr + + adds x23, x23, x9 + adcs x24, x24, x28 + adcs x25, x25, x3 + adcs x26, x26, x4 + adcs x27, x8, x27 + adc x28, x11, xzr + + // Restore x8,x9 + ldp x8,x9, [x2,#32] + + neg x1, x1 + + // x8-x10,x20,x15-x17,x19 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + sbcs x20, x20, x24 + sbcs x15, x15, x25 + sbcs x16, x16, x26 + sbcs x17, x17, x27 + sbcs x19, x19, x28 + sbc x7, x7, xzr + + // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low + stp x8, x9, [x2,#32] + stp x10, x20, [x2,#48] + + adds x1, x1, #1 + adcs x15, x15, x21 + adcs x16, x16, x22 + adcs x17, x17, x23 + adcs x19, x19, x24 + adcs x25, x7, x25 + adcs x26, x26, xzr + adcs x27, x27, xzr + adc x28, x28, xzr + + stp x15, x16, [x2,#64] + stp x17, x19, [x2,#80] + stp x25, x26, [x2,#96] + stp x27, x28, [x2,#112] + + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldp x29, x30, [sp],#96 + ret +.globl _sike_fprdc +.private_extern _sike_fprdc +.align 4 +_sike_fprdc: + stp x29, x30, [sp, #-112]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + str x1, [sp,#96] + + ldp x2, x3, [x0,#0] // a[0-1] + + // Load the prime constant + adrp x23, Lp503p1_nz_s8@PAGE + add x23, x23, Lp503p1_nz_s8@PAGEOFF + ldp x24, x25, [x23, #0] + ldp x26, x27, [x23, #16] + + // a[0-1] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x4, x2, x24 // a[0] x Lp503p1_nz_s8[0] + umulh x7, x2, x24 + mul x5, x2, x25 // a[0] x Lp503p1_nz_s8[1] + umulh x6, x2, x25 + + mul x1, x3, x24 + umulh x10, x3, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x2, x26 + umulh x19, x2, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x3, x25 + umulh x10, x3, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x2, x27 + umulh x19, x2, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x3, x26 + umulh x10, x3, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x3, x27 + umulh x19, x3, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + ldp x2, x3, [x0,#16] // a[2] + ldp x12, x13, [x0,#32] + ldp x14, x15, [x0,#48] + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x3, x4, x3 // a[3] + adcs x12, x5, x12 // a[4] + adcs x13, x6, x13 + adcs x14, x7, x14 + adcs x15, x8, x15 + ldp x16, x17, [x0,#64] + ldp x28, x30, [x0,#80] + mul x4, x2, x24 // a[2] x Lp503p1_nz_s8[0] + umulh x7, x2, x24 + adcs x16, x9, x16 + adcs x17, x10, x17 + adcs x28, xzr, x28 + adcs x30, xzr, x30 + ldp x20, x21, [x0,#96] + ldp x22, x23, [x0,#112] + mul x5, x2, x25 // a[2] x Lp503p1_nz_s8[1] + umulh x6, x2, x25 + adcs x20, xzr, x20 + adcs x21, xzr, x21 + adcs x22, xzr, x22 + adc x23, xzr, x23 + + // a[2-3] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x1, x3, x24 + umulh x10, x3, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x2, x26 + umulh x19, x2, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x3, x25 + umulh x10, x3, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x2, x27 + umulh x19, x2, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x3, x26 + umulh x10, x3, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x3, x27 + umulh x19, x3, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x13, x4, x13 // a[5] + adcs x14, x5, x14 // a[6] + adcs x15, x6, x15 + adcs x16, x7, x16 + mul x4, x12, x24 // a[4] x Lp503p1_nz_s8[0] + umulh x7, x12, x24 + adcs x17, x8, x17 + adcs x28, x9, x28 + adcs x30, x10, x30 + adcs x20, xzr, x20 + mul x5, x12, x25 // a[4] x Lp503p1_nz_s8[1] + umulh x6, x12, x25 + adcs x21, xzr, x21 + adcs x22, xzr, x22 + adc x23, xzr, x23 + + // a[4-5] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x1, x13, x24 + umulh x10, x13, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x12, x26 + umulh x19, x12, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x13, x25 + umulh x10, x13, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x12, x27 + umulh x19, x12, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x13, x26 + umulh x10, x13, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x13, x27 + umulh x19, x13, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x15, x4, x15 // a[7] + adcs x16, x5, x16 // a[8] + adcs x17, x6, x17 + adcs x28, x7, x28 + mul x4, x14, x24 // a[6] x Lp503p1_nz_s8[0] + umulh x7, x14, x24 + adcs x30, x8, x30 + adcs x20, x9, x20 + adcs x21, x10, x21 + mul x5, x14, x25 // a[6] x Lp503p1_nz_s8[1] + umulh x6, x14, x25 + adcs x22, xzr, x22 + adc x23, xzr, x23 + + // a[6-7] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x1, x15, x24 + umulh x10, x15, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x14, x26 + umulh x19, x14, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x15, x25 + umulh x10, x15, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x14, x27 + umulh x19, x14, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x15, x26 + umulh x10, x15, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x15, x27 + umulh x19, x15, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x17, x4, x17 + adcs x28, x5, x28 + ldr x1, [sp,#96] + adcs x30, x6, x30 + adcs x20, x7, x20 + stp x16, x17, [x1,#0] // Final result + stp x28, x30, [x1,#16] + adcs x21, x8, x21 + adcs x22, x9, x22 + adc x23, x10, x23 + stp x20, x21, [x1,#32] + stp x22, x23, [x1,#48] + + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldp x29, x30, [sp],#112 + ret + +.globl _sike_fpadd +.private_extern _sike_fpadd +.align 4 +_sike_fpadd: + stp x29,x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + // Add a + b + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adc x10, x10, x14 + + // Subtract 2xp503 + adrp x17, Lp503x2@PAGE + add x17, x17, Lp503x2@PAGEOFF + ldp x11, x12, [x17, #0] + ldp x13, x14, [x17, #16] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x12 + sbcs x6, x6, x13 + sbcs x7, x7, x14 + + ldp x15, x16, [x17, #32] + ldr x17, [x17, #48] + sbcs x8, x8, x15 + sbcs x9, x9, x16 + sbcs x10, x10, x17 + sbc x0, xzr, xzr // x0 can be reused now + + // Add 2xp503 anded with the mask in x0 + and x11, x11, x0 + and x12, x12, x0 + and x13, x13, x0 + and x14, x14, x0 + and x15, x15, x0 + and x16, x16, x0 + and x17, x17, x0 + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x12 + adcs x6, x6, x13 + adcs x7, x7, x14 + adcs x8, x8, x15 + adcs x9, x9, x16 + adc x10, x10, x17 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x29, x30, [sp],#16 + ret + +.globl _sike_fpsub +.private_extern _sike_fpsub +.align 4 +_sike_fpsub: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + // Subtract a - b + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x11, x12, [x1,#32] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#48] + sbcs x9, x9, x11 + sbcs x10, x10, x12 + sbc x17, xzr, xzr + + // Add 2xp503 anded with the mask in x17 + adrp x16, Lp503x2@PAGE + add x16, x16, Lp503x2@PAGEOFF + + // First half + ldp x11, x12, [x16, #0] + ldp x13, x14, [x16, #16] + and x11, x11, x17 + and x12, x12, x17 + and x13, x13, x17 + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x12 + adcs x6, x6, x13 + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + + // Second half + ldp x11, x12, [x16, #32] + ldr x13, [x16, #48] + and x14, x14, x17 + and x11, x11, x17 + and x12, x12, x17 + and x13, x13, x17 + adcs x7, x7, x14 + adcs x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x13 + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +.globl _sike_mpadd_asm +.private_extern _sike_mpadd_asm +.align 4 +_sike_mpadd_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adc x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +.globl _sike_mpadd503x2_asm +.private_extern _sike_mpadd503x2_asm +.align 4 +_sike_mpadd503x2_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x3, x4, [x0,#64] + ldp x5, x6, [x0,#80] + ldp x11, x12, [x1,#64] + ldp x13, x14, [x1,#80] + adcs x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#96] + ldp x9, x10, [x0,#112] + ldp x11, x12, [x1,#96] + ldp x13, x14, [x1,#112] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adc x10, x10, x14 + + stp x3, x4, [x2,#64] + stp x5, x6, [x2,#80] + stp x7, x8, [x2,#96] + stp x9, x10, [x2,#112] + + ldp x29, x30, [sp],#16 + ret +.globl _sike_mpsubx2_asm +.private_extern _sike_mpsubx2_asm +.align 4 +_sike_mpsubx2_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + sbcs x9, x9, x13 + sbcs x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x3, x4, [x0,#64] + ldp x5, x6, [x0,#80] + ldp x11, x12, [x1,#64] + ldp x13, x14, [x1,#80] + sbcs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#96] + ldp x9, x10, [x0,#112] + ldp x11, x12, [x1,#96] + ldp x13, x14, [x1,#112] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + sbcs x9, x9, x13 + sbcs x10, x10, x14 + sbc x0, xzr, xzr + + stp x3, x4, [x2,#64] + stp x5, x6, [x2,#80] + stp x7, x8, [x2,#96] + stp x9, x10, [x2,#112] + + ldp x29, x30, [sp],#16 + ret +.globl _sike_mpdblsubx2_asm +.private_extern _sike_mpdblsubx2_asm +.align 4 +_sike_mpdblsubx2_asm: + stp x29, x30, [sp, #-64]! + add x29, sp, #0 + + stp x20, x21, [sp, #16] + stp x22, x23, [sp, #32] + str x24, [sp, #48] + + ldp x3, x4, [x2,#0] + ldp x5, x6, [x2,#16] + ldp x7, x8, [x2,#32] + ldp x9, x10, [x2,#48] + ldp x11, x12, [x2,#64] + ldp x13, x14, [x2,#80] + ldp x15, x16, [x2,#96] + ldp x17, x24, [x2,#112] + + ldp x20, x21, [x0,#0] + ldp x22, x23, [x0,#16] + subs x3, x3, x20 + sbcs x4, x4, x21 + sbcs x5, x5, x22 + sbcs x6, x6, x23 + ldp x20, x21, [x0,#32] + ldp x22, x23, [x0,#48] + sbcs x7, x7, x20 + sbcs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + ldp x20, x21, [x0,#64] + ldp x22, x23, [x0,#80] + sbcs x11, x11, x20 + sbcs x12, x12, x21 + sbcs x13, x13, x22 + sbcs x14, x14, x23 + ldp x20, x21, [x0,#96] + ldp x22, x23, [x0,#112] + sbcs x15, x15, x20 + sbcs x16, x16, x21 + sbcs x17, x17, x22 + sbc x24, x24, x23 + + ldp x20, x21, [x1,#0] + ldp x22, x23, [x1,#16] + subs x3, x3, x20 + sbcs x4, x4, x21 + sbcs x5, x5, x22 + sbcs x6, x6, x23 + ldp x20, x21, [x1,#32] + ldp x22, x23, [x1,#48] + sbcs x7, x7, x20 + sbcs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + ldp x20, x21, [x1,#64] + ldp x22, x23, [x1,#80] + sbcs x11, x11, x20 + sbcs x12, x12, x21 + sbcs x13, x13, x22 + sbcs x14, x14, x23 + ldp x20, x21, [x1,#96] + ldp x22, x23, [x1,#112] + sbcs x15, x15, x20 + sbcs x16, x16, x21 + sbcs x17, x17, x22 + sbc x24, x24, x23 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + stp x11, x12, [x2,#64] + stp x13, x14, [x2,#80] + stp x15, x16, [x2,#96] + stp x17, x24, [x2,#112] + + ldp x20, x21, [x29,#16] + ldp x22, x23, [x29,#32] + ldr x24, [x29,#48] + + ldp x29, x30, [sp],#64 + ret +#endif // !OPENSSL_NO_ASM +#endif // defined(__aarch64__) && defined(__APPLE__) diff --git a/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.linux.aarch64.S b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.linux.aarch64.S new file mode 100644 index 00000000..01f6532f --- /dev/null +++ b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-armv8.linux.aarch64.S @@ -0,0 +1,1198 @@ +#define BORINGSSL_PREFIX CNIOBoringSSL +#if defined(__aarch64__) && defined(__linux__) +// This file is generated from a similarly-named Perl script in the BoringSSL +// source tree. Do not edit by hand. + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +#define OPENSSL_NO_ASM +#endif +#endif + +#if !defined(OPENSSL_NO_ASM) +#if defined(__aarch64__) +#if defined(BORINGSSL_PREFIX) +#include +#endif +.section .rodata + +.Lp503p1_nz_s8: +.quad 0x085BDA2211E7A0AC, 0x9BF6C87B7E7DAF13 +.quad 0x45C6BDDA77A4D01B, 0x4066F541811E1E60 + +.Lp503x2: +.quad 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF +.quad 0x57FFFFFFFFFFFFFF, 0x2610B7B44423CF41 +.quad 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0 +.quad 0x0080CDEA83023C3C + +.text +.globl sike_mpmul +.hidden sike_mpmul +.align 4 +sike_mpmul: + stp x29, x30, [sp,#-96]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + + ldp x3, x4, [x0] + ldp x5, x6, [x0,#16] + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + ldp x15, x16, [x1,#32] + ldp x17, x19, [x1,#48] + + // x3-x7 <- AH + AL, x7 <- carry + adds x3, x3, x7 + adcs x4, x4, x8 + adcs x5, x5, x9 + adcs x6, x6, x10 + adc x7, xzr, xzr + + // x11-x14 <- BH + BL, x8 <- carry + adds x11, x11, x15 + adcs x12, x12, x16 + adcs x13, x13, x17 + adcs x14, x14, x19 + adc x8, xzr, xzr + + // x9 <- combined carry + and x9, x7, x8 + // x7-x8 <- mask + sub x7, xzr, x7 + sub x8, xzr, x8 + + + // x15-x19 <- masked (BH + BL) + and x15, x11, x7 + and x16, x12, x7 + and x17, x13, x7 + and x19, x14, x7 + + // x20-x23 <- masked (AH + AL) + and x20, x3, x8 + and x21, x4, x8 + and x22, x5, x8 + and x23, x6, x8 + + // x15-x19, x7 <- masked (AH+AL) + masked (BH+BL), step 1 + adds x15, x15, x20 + adcs x16, x16, x21 + adcs x17, x17, x22 + adcs x19, x19, x23 + adc x7, x9, xzr + + // x8-x10,x20-x24 <- (AH+AL) x (BH+BL), low part + stp x3, x4, [x2,#0] + // A0-A1 <- AH + AL, T0 <- mask + adds x3, x3, x5 + adcs x4, x4, x6 + adc x25, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds x23, x11, x13 + adcs x26, x12, x14 + adc x24, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub x10, xzr, x25 + sub x20, xzr, x24 + and x8, x23, x10 + and x9, x26, x10 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and x21, x3, x20 + and x22, x4, x20 + mul x10, x3, x23 + mul x20, x3, x26 + and x25, x25, x24 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds x8, x21, x8 + umulh x21, x3, x26 + adcs x9, x22, x9 + umulh x22, x3, x23 + adc x25, x25, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + mul x3, x4, x23 + umulh x23, x4, x23 + adds x20, x20, x22 + adc x21, x21, xzr + + mul x24, x4, x26 + umulh x26, x4, x26 + adds x20, x20, x3 + adcs x21, x21, x23 + adc x22, xzr, xzr + + adds x21, x21, x24 + adc x22, x22, x26 + + ldp x3, x4, [x2,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds x21, x8, x21 + umulh x24, x3, x11 + umulh x26, x3, x12 + adcs x22, x9, x22 + mul x8, x3, x11 + mul x9, x3, x12 + adc x25, x25, xzr + + // C0-C1, T1, C7 <- AL x BL + mul x3, x4, x11 + umulh x11, x4, x11 + adds x9, x9, x24 + adc x26, x26, xzr + + mul x23, x4, x12 + umulh x12, x4, x12 + adds x9, x9, x3 + adcs x26, x26, x11 + adc x24, xzr, xzr + + adds x26, x26, x23 + adc x24, x24, x12 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul x3, x5, x13 + umulh x11, x5, x13 + subs x10, x10, x8 + sbcs x20, x20, x9 + sbcs x21, x21, x26 + mul x4, x5, x14 + umulh x23, x5, x14 + sbcs x22, x22, x24 + sbc x25, x25, xzr + + // A0, A1, C6, B0 <- AH x BH + mul x5, x6, x13 + umulh x13, x6, x13 + adds x4, x4, x11 + adc x23, x23, xzr + + mul x12, x6, x14 + umulh x14, x6, x14 + adds x4, x4, x5 + adcs x23, x23, x13 + adc x11, xzr, xzr + + adds x23, x23, x12 + adc x11, x11, x14 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x10, x10, x3 + sbcs x20, x20, x4 + sbcs x21, x21, x23 + sbcs x22, x22, x11 + sbc x25, x25, xzr + + adds x10, x10, x26 + adcs x20, x20, x24 + adcs x21, x21, x3 + adcs x22, x22, x4 + adcs x23, x25, x23 + adc x24, x11, xzr + + + // x15-x19, x7 <- (AH+AL) x (BH+BL), final step + adds x15, x15, x21 + adcs x16, x16, x22 + adcs x17, x17, x23 + adcs x19, x19, x24 + adc x7, x7, xzr + + // Load AL + ldp x3, x4, [x0] + ldp x5, x6, [x0,#16] + // Load BL + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + // Temporarily store x8,x9 in x2 + stp x8,x9, [x2,#0] + // x21-x28 <- AL x BL + // A0-A1 <- AH + AL, T0 <- mask + adds x3, x3, x5 + adcs x4, x4, x6 + adc x8, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds x27, x11, x13 + adcs x9, x12, x14 + adc x28, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub x23, xzr, x8 + sub x24, xzr, x28 + and x21, x27, x23 + and x22, x9, x23 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and x25, x3, x24 + and x26, x4, x24 + mul x23, x3, x27 + mul x24, x3, x9 + and x8, x8, x28 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds x21, x25, x21 + umulh x25, x3, x9 + adcs x22, x26, x22 + umulh x26, x3, x27 + adc x8, x8, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + mul x3, x4, x27 + umulh x27, x4, x27 + adds x24, x24, x26 + adc x25, x25, xzr + + mul x28, x4, x9 + umulh x9, x4, x9 + adds x24, x24, x3 + adcs x25, x25, x27 + adc x26, xzr, xzr + + adds x25, x25, x28 + adc x26, x26, x9 + + ldp x3, x4, [x0,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds x25, x21, x25 + umulh x28, x3, x11 + umulh x9, x3, x12 + adcs x26, x22, x26 + mul x21, x3, x11 + mul x22, x3, x12 + adc x8, x8, xzr + + // C0-C1, T1, C7 <- AL x BL + mul x3, x4, x11 + umulh x11, x4, x11 + adds x22, x22, x28 + adc x9, x9, xzr + + mul x27, x4, x12 + umulh x12, x4, x12 + adds x22, x22, x3 + adcs x9, x9, x11 + adc x28, xzr, xzr + + adds x9, x9, x27 + adc x28, x28, x12 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul x3, x5, x13 + umulh x11, x5, x13 + subs x23, x23, x21 + sbcs x24, x24, x22 + sbcs x25, x25, x9 + mul x4, x5, x14 + umulh x27, x5, x14 + sbcs x26, x26, x28 + sbc x8, x8, xzr + + // A0, A1, C6, B0 <- AH x BH + mul x5, x6, x13 + umulh x13, x6, x13 + adds x4, x4, x11 + adc x27, x27, xzr + + mul x12, x6, x14 + umulh x14, x6, x14 + adds x4, x4, x5 + adcs x27, x27, x13 + adc x11, xzr, xzr + + adds x27, x27, x12 + adc x11, x11, x14 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x23, x23, x3 + sbcs x24, x24, x4 + sbcs x25, x25, x27 + sbcs x26, x26, x11 + sbc x8, x8, xzr + + adds x23, x23, x9 + adcs x24, x24, x28 + adcs x25, x25, x3 + adcs x26, x26, x4 + adcs x27, x8, x27 + adc x28, x11, xzr + + // Restore x8,x9 + ldp x8,x9, [x2,#0] + + // x8-x10,x20,x15-x17,x19 <- maskd (AH+AL) x (BH+BL) - ALxBL + subs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + sbcs x20, x20, x24 + sbcs x15, x15, x25 + sbcs x16, x16, x26 + sbcs x17, x17, x27 + sbcs x19, x19, x28 + sbc x7, x7, xzr + + // Store ALxBL, low + stp x21, x22, [x2] + stp x23, x24, [x2,#16] + + // Load AH + ldp x3, x4, [x0,#32] + ldp x5, x6, [x0,#48] + // Load BH + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + + adds x8, x8, x25 + adcs x9, x9, x26 + adcs x10, x10, x27 + adcs x20, x20, x28 + adc x1, xzr, xzr + + add x0, x0, #32 + // Temporarily store x8,x9 in x2 + stp x8,x9, [x2,#32] + // x21-x28 <- AH x BH + // A0-A1 <- AH + AL, T0 <- mask + adds x3, x3, x5 + adcs x4, x4, x6 + adc x8, xzr, xzr + + // C6, T1 <- BH + BL, C7 <- mask + adds x27, x11, x13 + adcs x9, x12, x14 + adc x28, xzr, xzr + + // C0-C1 <- masked (BH + BL) + sub x23, xzr, x8 + sub x24, xzr, x28 + and x21, x27, x23 + and x22, x9, x23 + + // C4-C5 <- masked (AH + AL), T0 <- combined carry + and x25, x3, x24 + and x26, x4, x24 + mul x23, x3, x27 + mul x24, x3, x9 + and x8, x8, x28 + + // C0-C1, T0 <- (AH+AL) x (BH+BL), part 1 + adds x21, x25, x21 + umulh x25, x3, x9 + adcs x22, x26, x22 + umulh x26, x3, x27 + adc x8, x8, xzr + + // C2-C5 <- (AH+AL) x (BH+BL), low part + mul x3, x4, x27 + umulh x27, x4, x27 + adds x24, x24, x26 + adc x25, x25, xzr + + mul x28, x4, x9 + umulh x9, x4, x9 + adds x24, x24, x3 + adcs x25, x25, x27 + adc x26, xzr, xzr + + adds x25, x25, x28 + adc x26, x26, x9 + + ldp x3, x4, [x0,#0] + + // C2-C5, T0 <- (AH+AL) x (BH+BL), final part + adds x25, x21, x25 + umulh x28, x3, x11 + umulh x9, x3, x12 + adcs x26, x22, x26 + mul x21, x3, x11 + mul x22, x3, x12 + adc x8, x8, xzr + + // C0-C1, T1, C7 <- AL x BL + mul x3, x4, x11 + umulh x11, x4, x11 + adds x22, x22, x28 + adc x9, x9, xzr + + mul x27, x4, x12 + umulh x12, x4, x12 + adds x22, x22, x3 + adcs x9, x9, x11 + adc x28, xzr, xzr + + adds x9, x9, x27 + adc x28, x28, x12 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL + mul x3, x5, x13 + umulh x11, x5, x13 + subs x23, x23, x21 + sbcs x24, x24, x22 + sbcs x25, x25, x9 + mul x4, x5, x14 + umulh x27, x5, x14 + sbcs x26, x26, x28 + sbc x8, x8, xzr + + // A0, A1, C6, B0 <- AH x BH + mul x5, x6, x13 + umulh x13, x6, x13 + adds x4, x4, x11 + adc x27, x27, xzr + + mul x12, x6, x14 + umulh x14, x6, x14 + adds x4, x4, x5 + adcs x27, x27, x13 + adc x11, xzr, xzr + + adds x27, x27, x12 + adc x11, x11, x14 + + + // C2-C5, T0 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x23, x23, x3 + sbcs x24, x24, x4 + sbcs x25, x25, x27 + sbcs x26, x26, x11 + sbc x8, x8, xzr + + adds x23, x23, x9 + adcs x24, x24, x28 + adcs x25, x25, x3 + adcs x26, x26, x4 + adcs x27, x8, x27 + adc x28, x11, xzr + + // Restore x8,x9 + ldp x8,x9, [x2,#32] + + neg x1, x1 + + // x8-x10,x20,x15-x17,x19 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + subs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + sbcs x20, x20, x24 + sbcs x15, x15, x25 + sbcs x16, x16, x26 + sbcs x17, x17, x27 + sbcs x19, x19, x28 + sbc x7, x7, xzr + + // Store (AH+AL) x (BH+BL) - ALxBL - AHxBH, low + stp x8, x9, [x2,#32] + stp x10, x20, [x2,#48] + + adds x1, x1, #1 + adcs x15, x15, x21 + adcs x16, x16, x22 + adcs x17, x17, x23 + adcs x19, x19, x24 + adcs x25, x7, x25 + adcs x26, x26, xzr + adcs x27, x27, xzr + adc x28, x28, xzr + + stp x15, x16, [x2,#64] + stp x17, x19, [x2,#80] + stp x25, x26, [x2,#96] + stp x27, x28, [x2,#112] + + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldp x29, x30, [sp],#96 + ret +.globl sike_fprdc +.hidden sike_fprdc +.align 4 +sike_fprdc: + stp x29, x30, [sp, #-112]! + add x29, sp, #0 + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + str x1, [sp,#96] + + ldp x2, x3, [x0,#0] // a[0-1] + + // Load the prime constant + adrp x23, .Lp503p1_nz_s8 + add x23, x23, :lo12:.Lp503p1_nz_s8 + ldp x24, x25, [x23, #0] + ldp x26, x27, [x23, #16] + + // a[0-1] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x4, x2, x24 // a[0] x .Lp503p1_nz_s8[0] + umulh x7, x2, x24 + mul x5, x2, x25 // a[0] x .Lp503p1_nz_s8[1] + umulh x6, x2, x25 + + mul x1, x3, x24 + umulh x10, x3, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x2, x26 + umulh x19, x2, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x3, x25 + umulh x10, x3, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x2, x27 + umulh x19, x2, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x3, x26 + umulh x10, x3, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x3, x27 + umulh x19, x3, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + ldp x2, x3, [x0,#16] // a[2] + ldp x12, x13, [x0,#32] + ldp x14, x15, [x0,#48] + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x3, x4, x3 // a[3] + adcs x12, x5, x12 // a[4] + adcs x13, x6, x13 + adcs x14, x7, x14 + adcs x15, x8, x15 + ldp x16, x17, [x0,#64] + ldp x28, x30, [x0,#80] + mul x4, x2, x24 // a[2] x .Lp503p1_nz_s8[0] + umulh x7, x2, x24 + adcs x16, x9, x16 + adcs x17, x10, x17 + adcs x28, xzr, x28 + adcs x30, xzr, x30 + ldp x20, x21, [x0,#96] + ldp x22, x23, [x0,#112] + mul x5, x2, x25 // a[2] x .Lp503p1_nz_s8[1] + umulh x6, x2, x25 + adcs x20, xzr, x20 + adcs x21, xzr, x21 + adcs x22, xzr, x22 + adc x23, xzr, x23 + + // a[2-3] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x1, x3, x24 + umulh x10, x3, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x2, x26 + umulh x19, x2, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x3, x25 + umulh x10, x3, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x2, x27 + umulh x19, x2, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x3, x26 + umulh x10, x3, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x3, x27 + umulh x19, x3, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x13, x4, x13 // a[5] + adcs x14, x5, x14 // a[6] + adcs x15, x6, x15 + adcs x16, x7, x16 + mul x4, x12, x24 // a[4] x .Lp503p1_nz_s8[0] + umulh x7, x12, x24 + adcs x17, x8, x17 + adcs x28, x9, x28 + adcs x30, x10, x30 + adcs x20, xzr, x20 + mul x5, x12, x25 // a[4] x .Lp503p1_nz_s8[1] + umulh x6, x12, x25 + adcs x21, xzr, x21 + adcs x22, xzr, x22 + adc x23, xzr, x23 + + // a[4-5] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x1, x13, x24 + umulh x10, x13, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x12, x26 + umulh x19, x12, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x13, x25 + umulh x10, x13, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x12, x27 + umulh x19, x12, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x13, x26 + umulh x10, x13, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x13, x27 + umulh x19, x13, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x15, x4, x15 // a[7] + adcs x16, x5, x16 // a[8] + adcs x17, x6, x17 + adcs x28, x7, x28 + mul x4, x14, x24 // a[6] x .Lp503p1_nz_s8[0] + umulh x7, x14, x24 + adcs x30, x8, x30 + adcs x20, x9, x20 + adcs x21, x10, x21 + mul x5, x14, x25 // a[6] x .Lp503p1_nz_s8[1] + umulh x6, x14, x25 + adcs x22, xzr, x22 + adc x23, xzr, x23 + + // a[6-7] x .Lp503p1_nz_s8 --> result: x4:x9 + mul x1, x15, x24 + umulh x10, x15, x24 + adds x5, x5, x7 + adc x6, x6, xzr + + mul x11, x14, x26 + umulh x19, x14, x26 + adds x5, x5, x1 + adcs x6, x6, x10 + adc x7, xzr, xzr + + mul x1, x15, x25 + umulh x10, x15, x25 + adds x6, x6, x11 + adcs x7, x7, x19 + adc x8, xzr, xzr + + mul x11, x14, x27 + umulh x19, x14, x27 + adds x6, x6, x1 + adcs x7, x7, x10 + adc x8, x8, xzr + + mul x1, x15, x26 + umulh x10, x15, x26 + adds x7, x7, x11 + adcs x8, x8, x19 + adc x9, xzr, xzr + + mul x11, x15, x27 + umulh x19, x15, x27 + adds x7, x7, x1 + adcs x8, x8, x10 + adc x9, x9, xzr + adds x8, x8, x11 + adc x9, x9, x19 + + + + orr x10, xzr, x9, lsr #8 + lsl x9, x9, #56 + orr x9, x9, x8, lsr #8 + lsl x8, x8, #56 + orr x8, x8, x7, lsr #8 + lsl x7, x7, #56 + orr x7, x7, x6, lsr #8 + lsl x6, x6, #56 + orr x6, x6, x5, lsr #8 + lsl x5, x5, #56 + orr x5, x5, x4, lsr #8 + lsl x4, x4, #56 + + adds x17, x4, x17 + adcs x28, x5, x28 + ldr x1, [sp,#96] + adcs x30, x6, x30 + adcs x20, x7, x20 + stp x16, x17, [x1,#0] // Final result + stp x28, x30, [x1,#16] + adcs x21, x8, x21 + adcs x22, x9, x22 + adc x23, x10, x23 + stp x20, x21, [x1,#32] + stp x22, x23, [x1,#48] + + ldp x19, x20, [x29,#16] + ldp x21, x22, [x29,#32] + ldp x23, x24, [x29,#48] + ldp x25, x26, [x29,#64] + ldp x27, x28, [x29,#80] + ldp x29, x30, [sp],#112 + ret + +.globl sike_fpadd +.hidden sike_fpadd +.align 4 +sike_fpadd: + stp x29,x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + // Add a + b + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adc x10, x10, x14 + + // Subtract 2xp503 + adrp x17, .Lp503x2 + add x17, x17, :lo12:.Lp503x2 + ldp x11, x12, [x17, #0] + ldp x13, x14, [x17, #16] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x12 + sbcs x6, x6, x13 + sbcs x7, x7, x14 + + ldp x15, x16, [x17, #32] + ldr x17, [x17, #48] + sbcs x8, x8, x15 + sbcs x9, x9, x16 + sbcs x10, x10, x17 + sbc x0, xzr, xzr // x0 can be reused now + + // Add 2xp503 anded with the mask in x0 + and x11, x11, x0 + and x12, x12, x0 + and x13, x13, x0 + and x14, x14, x0 + and x15, x15, x0 + and x16, x16, x0 + and x17, x17, x0 + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x12 + adcs x6, x6, x13 + adcs x7, x7, x14 + adcs x8, x8, x15 + adcs x9, x9, x16 + adc x10, x10, x17 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x29, x30, [sp],#16 + ret + +.globl sike_fpsub +.hidden sike_fpsub +.align 4 +sike_fpsub: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + // Subtract a - b + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x11, x12, [x1,#32] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#48] + sbcs x9, x9, x11 + sbcs x10, x10, x12 + sbc x17, xzr, xzr + + // Add 2xp503 anded with the mask in x17 + adrp x16, .Lp503x2 + add x16, x16, :lo12:.Lp503x2 + + // First half + ldp x11, x12, [x16, #0] + ldp x13, x14, [x16, #16] + and x11, x11, x17 + and x12, x12, x17 + and x13, x13, x17 + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x12 + adcs x6, x6, x13 + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + + // Second half + ldp x11, x12, [x16, #32] + ldr x13, [x16, #48] + and x14, x14, x17 + and x11, x11, x17 + and x12, x12, x17 + and x13, x13, x17 + adcs x7, x7, x14 + adcs x8, x8, x11 + adcs x9, x9, x12 + adc x10, x10, x13 + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +.globl sike_mpadd_asm +.hidden sike_mpadd_asm +.align 4 +sike_mpadd_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adc x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x29, x30, [sp],#16 + ret +.globl sike_mpadd503x2_asm +.hidden sike_mpadd503x2_asm +.align 4 +sike_mpadd503x2_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + adds x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x3, x4, [x0,#64] + ldp x5, x6, [x0,#80] + ldp x11, x12, [x1,#64] + ldp x13, x14, [x1,#80] + adcs x3, x3, x11 + adcs x4, x4, x12 + adcs x5, x5, x13 + adcs x6, x6, x14 + ldp x7, x8, [x0,#96] + ldp x9, x10, [x0,#112] + ldp x11, x12, [x1,#96] + ldp x13, x14, [x1,#112] + adcs x7, x7, x11 + adcs x8, x8, x12 + adcs x9, x9, x13 + adc x10, x10, x14 + + stp x3, x4, [x2,#64] + stp x5, x6, [x2,#80] + stp x7, x8, [x2,#96] + stp x9, x10, [x2,#112] + + ldp x29, x30, [sp],#16 + ret +.globl sike_mpsubx2_asm +.hidden sike_mpsubx2_asm +.align 4 +sike_mpsubx2_asm: + stp x29, x30, [sp,#-16]! + add x29, sp, #0 + + ldp x3, x4, [x0,#0] + ldp x5, x6, [x0,#16] + ldp x11, x12, [x1,#0] + ldp x13, x14, [x1,#16] + subs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#32] + ldp x9, x10, [x0,#48] + ldp x11, x12, [x1,#32] + ldp x13, x14, [x1,#48] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + sbcs x9, x9, x13 + sbcs x10, x10, x14 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + + ldp x3, x4, [x0,#64] + ldp x5, x6, [x0,#80] + ldp x11, x12, [x1,#64] + ldp x13, x14, [x1,#80] + sbcs x3, x3, x11 + sbcs x4, x4, x12 + sbcs x5, x5, x13 + sbcs x6, x6, x14 + ldp x7, x8, [x0,#96] + ldp x9, x10, [x0,#112] + ldp x11, x12, [x1,#96] + ldp x13, x14, [x1,#112] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + sbcs x9, x9, x13 + sbcs x10, x10, x14 + sbc x0, xzr, xzr + + stp x3, x4, [x2,#64] + stp x5, x6, [x2,#80] + stp x7, x8, [x2,#96] + stp x9, x10, [x2,#112] + + ldp x29, x30, [sp],#16 + ret +.globl sike_mpdblsubx2_asm +.hidden sike_mpdblsubx2_asm +.align 4 +sike_mpdblsubx2_asm: + stp x29, x30, [sp, #-64]! + add x29, sp, #0 + + stp x20, x21, [sp, #16] + stp x22, x23, [sp, #32] + str x24, [sp, #48] + + ldp x3, x4, [x2,#0] + ldp x5, x6, [x2,#16] + ldp x7, x8, [x2,#32] + ldp x9, x10, [x2,#48] + ldp x11, x12, [x2,#64] + ldp x13, x14, [x2,#80] + ldp x15, x16, [x2,#96] + ldp x17, x24, [x2,#112] + + ldp x20, x21, [x0,#0] + ldp x22, x23, [x0,#16] + subs x3, x3, x20 + sbcs x4, x4, x21 + sbcs x5, x5, x22 + sbcs x6, x6, x23 + ldp x20, x21, [x0,#32] + ldp x22, x23, [x0,#48] + sbcs x7, x7, x20 + sbcs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + ldp x20, x21, [x0,#64] + ldp x22, x23, [x0,#80] + sbcs x11, x11, x20 + sbcs x12, x12, x21 + sbcs x13, x13, x22 + sbcs x14, x14, x23 + ldp x20, x21, [x0,#96] + ldp x22, x23, [x0,#112] + sbcs x15, x15, x20 + sbcs x16, x16, x21 + sbcs x17, x17, x22 + sbc x24, x24, x23 + + ldp x20, x21, [x1,#0] + ldp x22, x23, [x1,#16] + subs x3, x3, x20 + sbcs x4, x4, x21 + sbcs x5, x5, x22 + sbcs x6, x6, x23 + ldp x20, x21, [x1,#32] + ldp x22, x23, [x1,#48] + sbcs x7, x7, x20 + sbcs x8, x8, x21 + sbcs x9, x9, x22 + sbcs x10, x10, x23 + ldp x20, x21, [x1,#64] + ldp x22, x23, [x1,#80] + sbcs x11, x11, x20 + sbcs x12, x12, x21 + sbcs x13, x13, x22 + sbcs x14, x14, x23 + ldp x20, x21, [x1,#96] + ldp x22, x23, [x1,#112] + sbcs x15, x15, x20 + sbcs x16, x16, x21 + sbcs x17, x17, x22 + sbc x24, x24, x23 + + stp x3, x4, [x2,#0] + stp x5, x6, [x2,#16] + stp x7, x8, [x2,#32] + stp x9, x10, [x2,#48] + stp x11, x12, [x2,#64] + stp x13, x14, [x2,#80] + stp x15, x16, [x2,#96] + stp x17, x24, [x2,#112] + + ldp x20, x21, [x29,#16] + ldp x22, x23, [x29,#32] + ldr x24, [x29,#48] + + ldp x29, x30, [sp],#64 + ret +#endif +#endif // !OPENSSL_NO_ASM +#endif // defined(__aarch64__) && defined(__linux__) diff --git a/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.linux.x86_64.S b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.linux.x86_64.S new file mode 100644 index 00000000..25069c5a --- /dev/null +++ b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.linux.x86_64.S @@ -0,0 +1,2089 @@ +#define BORINGSSL_PREFIX CNIOBoringSSL +#if defined(__x86_64__) && defined(__linux__) +# This file is generated from a similarly-named Perl script in the BoringSSL +# source tree. Do not edit by hand. + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +#define OPENSSL_NO_ASM +#endif +#endif + +#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +#if defined(BORINGSSL_PREFIX) +#include +#endif +.text + + +.Lp503x2: +.quad 0xFFFFFFFFFFFFFFFE +.quad 0xFFFFFFFFFFFFFFFF +.quad 0x57FFFFFFFFFFFFFF +.quad 0x2610B7B44423CF41 +.quad 0x3737ED90F6FCFB5E +.quad 0xC08B8D7BB4EF49A0 +.quad 0x0080CDEA83023C3C + + +.Lp503p1: +.quad 0xAC00000000000000 +.quad 0x13085BDA2211E7A0 +.quad 0x1B9BF6C87B7E7DAF +.quad 0x6045C6BDDA77A4D0 +.quad 0x004066F541811E1E + +.Lp503p1_nz: +.quad 0xAC00000000000000 +.quad 0x13085BDA2211E7A0 +.quad 0x1B9BF6C87B7E7DAF +.quad 0x6045C6BDDA77A4D0 +.quad 0x004066F541811E1E + +.extern OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P +.hidden OPENSSL_ia32cap_P + +.globl sike_fpadd +.hidden sike_fpadd +.type sike_fpadd,@function +sike_fpadd: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15, -40 + + xorq %rax,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + adcq 32(%rsi),%r12 + adcq 40(%rsi),%r13 + adcq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + + movq .Lp503x2(%rip),%rcx; + subq %rcx,%r8 + movq 8+.Lp503x2(%rip),%rcx; + sbbq %rcx,%r9 + sbbq %rcx,%r10 + movq 16+.Lp503x2(%rip),%rcx; + sbbq %rcx,%r11 + movq 24+.Lp503x2(%rip),%rcx; + sbbq %rcx,%r12 + movq 32+.Lp503x2(%rip),%rcx; + sbbq %rcx,%r13 + movq 40+.Lp503x2(%rip),%rcx; + sbbq %rcx,%r14 + movq 48+.Lp503x2(%rip),%rcx; + sbbq %rcx,%r15 + sbbq $0,%rax + + movq .Lp503x2(%rip),%rdi + andq %rax,%rdi + movq 8+.Lp503x2(%rip),%rsi + andq %rax,%rsi + movq 16+.Lp503x2(%rip),%rcx + andq %rax,%rcx + + addq %rdi,%r8 + movq %r8,0(%rdx) + adcq %rsi,%r9 + movq %r9,8(%rdx) + adcq %rsi,%r10 + movq %r10,16(%rdx) + adcq %rcx,%r11 + movq %r11,24(%rdx) + + setc %cl + + movq 24+.Lp503x2(%rip),%r8 + andq %rax,%r8 + movq 32+.Lp503x2(%rip),%r9 + andq %rax,%r9 + movq 40+.Lp503x2(%rip),%r10 + andq %rax,%r10 + movq 48+.Lp503x2(%rip),%r11 + andq %rax,%r11 + + btq $0,%rcx + + adcq %r8,%r12 + movq %r12,32(%rdx) + adcq %r9,%r13 + movq %r13,40(%rdx) + adcq %r10,%r14 + movq %r14,48(%rdx) + adcq %r11,%r15 + movq %r15,56(%rdx) + + popq %r15 +.cfi_adjust_cfa_offset -8 + popq %r14 +.cfi_adjust_cfa_offset -8 + popq %r13 +.cfi_adjust_cfa_offset -8 + popq %r12 +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.cfi_endproc +.globl sike_cswap_asm +.hidden sike_cswap_asm +.type sike_cswap_asm,@function +sike_cswap_asm: + + + movq %rdx,%xmm3 + + + + + + pshufd $68,%xmm3,%xmm3 + + movdqu 0(%rdi),%xmm0 + movdqu 0(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,0(%rdi) + movdqu %xmm1,0(%rsi) + + movdqu 16(%rdi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,16(%rdi) + movdqu %xmm1,16(%rsi) + + movdqu 32(%rdi),%xmm0 + movdqu 32(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,32(%rdi) + movdqu %xmm1,32(%rsi) + + movdqu 48(%rdi),%xmm0 + movdqu 48(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,48(%rdi) + movdqu %xmm1,48(%rsi) + + movdqu 64(%rdi),%xmm0 + movdqu 64(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,64(%rdi) + movdqu %xmm1,64(%rsi) + + movdqu 80(%rdi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,80(%rdi) + movdqu %xmm1,80(%rsi) + + movdqu 96(%rdi),%xmm0 + movdqu 96(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,96(%rdi) + movdqu %xmm1,96(%rsi) + + movdqu 112(%rdi),%xmm0 + movdqu 112(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,112(%rdi) + movdqu %xmm1,112(%rsi) + + movdqu 128(%rdi),%xmm0 + movdqu 128(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,128(%rdi) + movdqu %xmm1,128(%rsi) + + movdqu 144(%rdi),%xmm0 + movdqu 144(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,144(%rdi) + movdqu %xmm1,144(%rsi) + + movdqu 160(%rdi),%xmm0 + movdqu 160(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,160(%rdi) + movdqu %xmm1,160(%rsi) + + movdqu 176(%rdi),%xmm0 + movdqu 176(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,176(%rdi) + movdqu %xmm1,176(%rsi) + + movdqu 192(%rdi),%xmm0 + movdqu 192(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,192(%rdi) + movdqu %xmm1,192(%rsi) + + movdqu 208(%rdi),%xmm0 + movdqu 208(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,208(%rdi) + movdqu %xmm1,208(%rsi) + + movdqu 224(%rdi),%xmm0 + movdqu 224(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,224(%rdi) + movdqu %xmm1,224(%rsi) + + movdqu 240(%rdi),%xmm0 + movdqu 240(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,240(%rdi) + movdqu %xmm1,240(%rsi) + + .byte 0xf3,0xc3 +.globl sike_fpsub +.hidden sike_fpsub +.type sike_fpsub,@function +sike_fpsub: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15, -40 + + xorq %rax,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%r12 + sbbq 40(%rsi),%r13 + sbbq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq $0x0,%rax + + movq .Lp503x2(%rip),%rdi + andq %rax,%rdi + movq 8+.Lp503x2(%rip),%rsi + andq %rax,%rsi + movq 16+.Lp503x2(%rip),%rcx + andq %rax,%rcx + + addq %rdi,%r8 + adcq %rsi,%r9 + adcq %rsi,%r10 + adcq %rcx,%r11 + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + + setc %cl + + movq 24+.Lp503x2(%rip),%r8 + andq %rax,%r8 + movq 32+.Lp503x2(%rip),%r9 + andq %rax,%r9 + movq 40+.Lp503x2(%rip),%r10 + andq %rax,%r10 + movq 48+.Lp503x2(%rip),%r11 + andq %rax,%r11 + + btq $0x0,%rcx + + adcq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + + popq %r15 +.cfi_adjust_cfa_offset -8 + popq %r14 +.cfi_adjust_cfa_offset -8 + popq %r13 +.cfi_adjust_cfa_offset -8 + popq %r12 +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.cfi_endproc +.globl sike_mpadd_asm +.hidden sike_mpadd_asm +.type sike_mpadd_asm,@function +sike_mpadd_asm: +.cfi_startproc + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + adcq 32(%rsi),%r8 + adcq 40(%rsi),%r9 + adcq 48(%rsi),%r10 + adcq 56(%rsi),%r11 + movq %r8,32(%rdx) + movq %r9,40(%rdx) + movq %r10,48(%rdx) + movq %r11,56(%rdx) + .byte 0xf3,0xc3 +.cfi_endproc +.globl sike_mpsubx2_asm +.hidden sike_mpsubx2_asm +.type sike_mpsubx2_asm,@function +sike_mpsubx2_asm: +.cfi_startproc + xorq %rax,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%rcx + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %rcx,32(%rdx) + + movq 40(%rdi),%r8 + movq 48(%rdi),%r9 + movq 56(%rdi),%r10 + movq 64(%rdi),%r11 + movq 72(%rdi),%rcx + sbbq 40(%rsi),%r8 + sbbq 48(%rsi),%r9 + sbbq 56(%rsi),%r10 + sbbq 64(%rsi),%r11 + sbbq 72(%rsi),%rcx + movq %r8,40(%rdx) + movq %r9,48(%rdx) + movq %r10,56(%rdx) + movq %r11,64(%rdx) + movq %rcx,72(%rdx) + + movq 80(%rdi),%r8 + movq 88(%rdi),%r9 + movq 96(%rdi),%r10 + movq 104(%rdi),%r11 + movq 112(%rdi),%rcx + sbbq 80(%rsi),%r8 + sbbq 88(%rsi),%r9 + sbbq 96(%rsi),%r10 + sbbq 104(%rsi),%r11 + sbbq 112(%rsi),%rcx + movq %r8,80(%rdx) + movq %r9,88(%rdx) + movq %r10,96(%rdx) + movq %r11,104(%rdx) + movq %rcx,112(%rdx) + + movq 120(%rdi),%r8 + sbbq 120(%rsi),%r8 + sbbq $0x0,%rax + movq %r8,120(%rdx) + .byte 0xf3,0xc3 +.cfi_endproc +.globl sike_mpdblsubx2_asm +.hidden sike_mpdblsubx2_asm +.type sike_mpdblsubx2_asm,@function +sike_mpdblsubx2_asm: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + + xorq %rax,%rax + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + movq 48(%rdx),%r14 + movq 56(%rdx),%rcx + subq 0(%rdi),%r8 + sbbq 8(%rdi),%r9 + sbbq 16(%rdi),%r10 + sbbq 24(%rdi),%r11 + sbbq 32(%rdi),%r12 + sbbq 40(%rdi),%r13 + sbbq 48(%rdi),%r14 + sbbq 56(%rdi),%rcx + adcq $0x0,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%r12 + sbbq 40(%rsi),%r13 + sbbq 48(%rsi),%r14 + sbbq 56(%rsi),%rcx + adcq $0x0,%rax + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %rcx,56(%rdx) + + movq 64(%rdx),%r8 + movq 72(%rdx),%r9 + movq 80(%rdx),%r10 + movq 88(%rdx),%r11 + movq 96(%rdx),%r12 + movq 104(%rdx),%r13 + movq 112(%rdx),%r14 + movq 120(%rdx),%rcx + + subq %rax,%r8 + sbbq 64(%rdi),%r8 + sbbq 72(%rdi),%r9 + sbbq 80(%rdi),%r10 + sbbq 88(%rdi),%r11 + sbbq 96(%rdi),%r12 + sbbq 104(%rdi),%r13 + sbbq 112(%rdi),%r14 + sbbq 120(%rdi),%rcx + subq 64(%rsi),%r8 + sbbq 72(%rsi),%r9 + sbbq 80(%rsi),%r10 + sbbq 88(%rsi),%r11 + sbbq 96(%rsi),%r12 + sbbq 104(%rsi),%r13 + sbbq 112(%rsi),%r14 + sbbq 120(%rsi),%rcx + + movq %r8,64(%rdx) + movq %r9,72(%rdx) + movq %r10,80(%rdx) + movq %r11,88(%rdx) + movq %r12,96(%rdx) + movq %r13,104(%rdx) + movq %r14,112(%rdx) + movq %rcx,120(%rdx) + + popq %r14 +.cfi_adjust_cfa_offset -8 + popq %r13 +.cfi_adjust_cfa_offset -8 + popq %r12 +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.cfi_endproc + +.Lmul_mulx: +.cfi_startproc + +.cfi_adjust_cfa_offset 32 +.cfi_offset r12, -16 +.cfi_offset r13, -24 +.cfi_offset r14, -32 +.cfi_offset r15, -40 + + movq %rdx,%rcx + + + xorq %rax,%rax + movq (%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + pushq %rbx + +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx, -48 + pushq %rbp +.cfi_offset rbp, -56 +.cfi_adjust_cfa_offset 8 + subq $96,%rsp +.cfi_adjust_cfa_offset 96 + addq 32(%rdi),%r8 + adcq 40(%rdi),%r9 + adcq 48(%rdi),%r10 + adcq 56(%rdi),%r11 + sbbq $0x0,%rax + movq %r8,(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + + + xorq %rbx,%rbx + movq (%rsi),%r12 + movq 8(%rsi),%r13 + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + addq 32(%rsi),%r12 + adcq 40(%rsi),%r13 + adcq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + sbbq $0x0,%rbx + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + + + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + + + andq %rbx,%r8 + andq %rbx,%r9 + andq %rbx,%r10 + andq %rbx,%r11 + + + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r8,64(%rsp) + movq %r9,72(%rsp) + movq %r10,80(%rsp) + movq %r11,88(%rsp) + + + movq 0+0(%rsp),%rdx + mulxq 32+0(%rsp),%r9,%r8 + movq %r9,64+0(%rcx) + mulxq 32+8(%rsp),%r10,%r9 + xorq %rax,%rax + adoxq %r10,%r8 + mulxq 32+16(%rsp),%r11,%r10 + adoxq %r11,%r9 + mulxq 32+24(%rsp),%r12,%r11 + adoxq %r12,%r10 + + movq 0+8(%rsp),%rdx + mulxq 32+0(%rsp),%r12,%r13 + adoxq %rax,%r11 + xorq %rax,%rax + mulxq 32+8(%rsp),%r15,%r14 + adoxq %r8,%r12 + movq %r12,64+8(%rcx) + adcxq %r15,%r13 + mulxq 32+16(%rsp),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r9,%r13 + mulxq 32+24(%rsp),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r10,%r14 + + movq 0+16(%rsp),%rdx + mulxq 32+0(%rsp),%r8,%r9 + adoxq %r11,%r15 + adoxq %rax,%rbx + xorq %rax,%rax + mulxq 32+8(%rsp),%r11,%r10 + adoxq %r13,%r8 + movq %r8,64+16(%rcx) + adcxq %r11,%r9 + mulxq 32+16(%rsp),%r12,%r11 + adcxq %r12,%r10 + adoxq %r14,%r9 + mulxq 32+24(%rsp),%rbp,%r12 + adcxq %rbp,%r11 + + adcxq %rax,%r12 + adoxq %r15,%r10 + adoxq %rbx,%r11 + adoxq %rax,%r12 + + movq 0+24(%rsp),%rdx + mulxq 32+0(%rsp),%r8,%r13 + xorq %rax,%rax + mulxq 32+8(%rsp),%r15,%r14 + adcxq %r15,%r13 + adoxq %r8,%r9 + mulxq 32+16(%rsp),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r13,%r10 + mulxq 32+24(%rsp),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r14,%r11 + adoxq %r15,%r12 + adoxq %rax,%rbx + movq %r9,64+24(%rcx) + movq %r10,64+32(%rcx) + movq %r11,64+40(%rcx) + movq %r12,64+48(%rcx) + movq %rbx,64+56(%rcx) + + + + movq 0+0(%rdi),%rdx + mulxq 0+0(%rsi),%r9,%r8 + movq %r9,0+0(%rcx) + mulxq 0+8(%rsi),%r10,%r9 + xorq %rax,%rax + adoxq %r10,%r8 + mulxq 0+16(%rsi),%r11,%r10 + adoxq %r11,%r9 + mulxq 0+24(%rsi),%r12,%r11 + adoxq %r12,%r10 + + movq 0+8(%rdi),%rdx + mulxq 0+0(%rsi),%r12,%r13 + adoxq %rax,%r11 + xorq %rax,%rax + mulxq 0+8(%rsi),%r15,%r14 + adoxq %r8,%r12 + movq %r12,0+8(%rcx) + adcxq %r15,%r13 + mulxq 0+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r9,%r13 + mulxq 0+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r10,%r14 + + movq 0+16(%rdi),%rdx + mulxq 0+0(%rsi),%r8,%r9 + adoxq %r11,%r15 + adoxq %rax,%rbx + xorq %rax,%rax + mulxq 0+8(%rsi),%r11,%r10 + adoxq %r13,%r8 + movq %r8,0+16(%rcx) + adcxq %r11,%r9 + mulxq 0+16(%rsi),%r12,%r11 + adcxq %r12,%r10 + adoxq %r14,%r9 + mulxq 0+24(%rsi),%rbp,%r12 + adcxq %rbp,%r11 + + adcxq %rax,%r12 + adoxq %r15,%r10 + adoxq %rbx,%r11 + adoxq %rax,%r12 + + movq 0+24(%rdi),%rdx + mulxq 0+0(%rsi),%r8,%r13 + xorq %rax,%rax + mulxq 0+8(%rsi),%r15,%r14 + adcxq %r15,%r13 + adoxq %r8,%r9 + mulxq 0+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r13,%r10 + mulxq 0+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r14,%r11 + adoxq %r15,%r12 + adoxq %rax,%rbx + movq %r9,0+24(%rcx) + movq %r10,0+32(%rcx) + movq %r11,0+40(%rcx) + movq %r12,0+48(%rcx) + movq %rbx,0+56(%rcx) + + + + movq 32+0(%rdi),%rdx + mulxq 32+0(%rsi),%r9,%r8 + movq %r9,0+0(%rsp) + mulxq 32+8(%rsi),%r10,%r9 + xorq %rax,%rax + adoxq %r10,%r8 + mulxq 32+16(%rsi),%r11,%r10 + adoxq %r11,%r9 + mulxq 32+24(%rsi),%r12,%r11 + adoxq %r12,%r10 + + movq 32+8(%rdi),%rdx + mulxq 32+0(%rsi),%r12,%r13 + adoxq %rax,%r11 + xorq %rax,%rax + mulxq 32+8(%rsi),%r15,%r14 + adoxq %r8,%r12 + movq %r12,0+8(%rsp) + adcxq %r15,%r13 + mulxq 32+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r9,%r13 + mulxq 32+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r10,%r14 + + movq 32+16(%rdi),%rdx + mulxq 32+0(%rsi),%r8,%r9 + adoxq %r11,%r15 + adoxq %rax,%rbx + xorq %rax,%rax + mulxq 32+8(%rsi),%r11,%r10 + adoxq %r13,%r8 + movq %r8,0+16(%rsp) + adcxq %r11,%r9 + mulxq 32+16(%rsi),%r12,%r11 + adcxq %r12,%r10 + adoxq %r14,%r9 + mulxq 32+24(%rsi),%rbp,%r12 + adcxq %rbp,%r11 + + adcxq %rax,%r12 + adoxq %r15,%r10 + adoxq %rbx,%r11 + adoxq %rax,%r12 + + movq 32+24(%rdi),%rdx + mulxq 32+0(%rsi),%r8,%r13 + xorq %rax,%rax + mulxq 32+8(%rsi),%r15,%r14 + adcxq %r15,%r13 + adoxq %r8,%r9 + mulxq 32+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r13,%r10 + mulxq 32+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r14,%r11 + adoxq %r15,%r12 + adoxq %rax,%rbx + movq %r9,0+24(%rsp) + movq %r10,0+32(%rsp) + movq %r11,0+40(%rsp) + movq %r12,0+48(%rsp) + movq %rbx,0+56(%rsp) + + + + + movq 64(%rsp),%r8 + movq 72(%rsp),%r9 + movq 80(%rsp),%r10 + movq 88(%rsp),%r11 + movq 96(%rcx),%rax + addq %rax,%r8 + movq 104(%rcx),%rax + adcq %rax,%r9 + movq 112(%rcx),%rax + adcq %rax,%r10 + movq 120(%rcx),%rax + adcq %rax,%r11 + + + movq 64(%rcx),%r12 + movq 72(%rcx),%r13 + movq 80(%rcx),%r14 + movq 88(%rcx),%r15 + subq (%rcx),%r12 + sbbq 8(%rcx),%r13 + sbbq 16(%rcx),%r14 + sbbq 24(%rcx),%r15 + sbbq 32(%rcx),%r8 + sbbq 40(%rcx),%r9 + sbbq 48(%rcx),%r10 + sbbq 56(%rcx),%r11 + + + subq (%rsp),%r12 + sbbq 8(%rsp),%r13 + sbbq 16(%rsp),%r14 + sbbq 24(%rsp),%r15 + sbbq 32(%rsp),%r8 + sbbq 40(%rsp),%r9 + sbbq 48(%rsp),%r10 + sbbq 56(%rsp),%r11 + + addq 32(%rcx),%r12 + movq %r12,32(%rcx) + adcq 40(%rcx),%r13 + movq %r13,40(%rcx) + adcq 48(%rcx),%r14 + movq %r14,48(%rcx) + adcq 56(%rcx),%r15 + movq %r15,56(%rcx) + movq (%rsp),%rax + adcq %rax,%r8 + movq %r8,64(%rcx) + movq 8(%rsp),%rax + adcq %rax,%r9 + movq %r9,72(%rcx) + movq 16(%rsp),%rax + adcq %rax,%r10 + movq %r10,80(%rcx) + movq 24(%rsp),%rax + adcq %rax,%r11 + movq %r11,88(%rcx) + movq 32(%rsp),%r12 + adcq $0x0,%r12 + movq %r12,96(%rcx) + movq 40(%rsp),%r13 + adcq $0x0,%r13 + movq %r13,104(%rcx) + movq 48(%rsp),%r14 + adcq $0x0,%r14 + movq %r14,112(%rcx) + movq 56(%rsp),%r15 + adcq $0x0,%r15 + movq %r15,120(%rcx) + + addq $96,%rsp +.cfi_adjust_cfa_offset -96 + popq %rbp +.cfi_adjust_cfa_offset -8 +.cfi_same_value rbp + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_same_value rbx + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r12 + .byte 0xf3,0xc3 +.cfi_endproc + +.globl sike_mpmul +.hidden sike_mpmul +.type sike_mpmul,@function +sike_mpmul: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15, -40 + + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lmul_mulx + + + + movq %rdx,%rcx + + + xorq %rax,%rax + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + movq %r8,0(%rcx) + movq %r9,8(%rcx) + movq %r10,16(%rcx) + movq %r11,24(%rcx) + sbbq $0,%rax + subq $80,%rsp +.cfi_adjust_cfa_offset 80 + + + xorq %rdx,%rdx + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + addq 0(%rsi),%r12 + adcq 8(%rsi),%r13 + adcq 16(%rsi),%r14 + adcq 24(%rsi),%r15 + sbbq $0x0,%rdx + movq %rax,64(%rsp) + movq %rdx,72(%rsp) + + + movq (%rcx),%rax + mulq %r12 + movq %rax,(%rsp) + movq %rdx,%r8 + + xorq %r9,%r9 + movq (%rcx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + + xorq %r10,%r10 + movq 8(%rcx),%rax + mulq %r12 + addq %rax,%r8 + movq %r8,8(%rsp) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq (%rcx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 16(%rcx),%rax + mulq %r12 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 8(%rcx),%rax + mulq %r13 + addq %rax,%r9 + movq %r9,16(%rsp) + adcq %rdx,%r10 + adcq $0x0,%r8 + + xorq %r9,%r9 + movq (%rcx),%rax + mulq %r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 24(%rcx),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 8(%rcx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 16(%rcx),%rax + mulq %r13 + addq %rax,%r10 + movq %r10,24(%rsp) + adcq %rdx,%r8 + adcq $0x0,%r9 + + xorq %r10,%r10 + movq 8(%rcx),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 24(%rcx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 16(%rcx),%rax + mulq %r14 + addq %rax,%r8 + movq %r8,32(%rsp) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r11,%r11 + movq 16(%rcx),%rax + mulq %r15 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r11 + + movq 24(%rcx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r11 + + movq 24(%rcx),%rax + mulq %r15 + addq %rax,%r10 + adcq %rdx,%r11 + + movq 64(%rsp),%rax + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %r11,%r15 + + movq 72(%rsp),%rax + movq (%rcx),%r8 + movq 8(%rcx),%r9 + movq 16(%rcx),%r10 + movq 24(%rcx),%r11 + andq %rax,%r8 + andq %rax,%r9 + andq %rax,%r10 + andq %rax,%r11 + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r8,32(%rsp) + movq %r9,40(%rsp) + movq %r10,48(%rsp) + movq %r11,56(%rsp) + + movq (%rdi),%r11 + movq (%rsi),%rax + mulq %r11 + xorq %r9,%r9 + movq %rax,(%rcx) + movq %rdx,%r8 + + movq 16(%rdi),%r14 + movq 8(%rsi),%rax + mulq %r11 + xorq %r10,%r10 + addq %rax,%r8 + adcq %rdx,%r9 + + movq 8(%rdi),%r12 + movq (%rsi),%rax + mulq %r12 + addq %rax,%r8 + movq %r8,8(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 16(%rsi),%rax + mulq %r11 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq (%rsi),%r13 + movq %r14,%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 8(%rsi),%rax + mulq %r12 + addq %rax,%r9 + movq %r9,16(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + xorq %r9,%r9 + movq 24(%rsi),%rax + mulq %r11 + movq 24(%rdi),%r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq %r15,%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 16(%rsi),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 8(%rsi),%rax + mulq %r14 + addq %rax,%r10 + movq %r10,24(%rcx) + adcq %rdx,%r8 + adcq $0x0,%r9 + + xorq %r10,%r10 + movq 24(%rsi),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 8(%rsi),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 16(%rsi),%rax + mulq %r14 + addq %rax,%r8 + movq %r8,32(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 24(%rsi),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 16(%rsi),%rax + mulq %r15 + addq %rax,%r9 + movq %r9,40(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 24(%rsi),%rax + mulq %r15 + addq %rax,%r10 + movq %r10,48(%rcx) + adcq %rdx,%r8 + movq %r8,56(%rcx) + + + movq 32(%rdi),%r11 + movq 32(%rsi),%rax + mulq %r11 + xorq %r9,%r9 + movq %rax,64(%rcx) + movq %rdx,%r8 + + movq 48(%rdi),%r14 + movq 40(%rsi),%rax + mulq %r11 + xorq %r10,%r10 + addq %rax,%r8 + adcq %rdx,%r9 + + movq 40(%rdi),%r12 + movq 32(%rsi),%rax + mulq %r12 + addq %rax,%r8 + movq %r8,72(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 48(%rsi),%rax + mulq %r11 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 32(%rsi),%r13 + movq %r14,%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 40(%rsi),%rax + mulq %r12 + addq %rax,%r9 + movq %r9,80(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + xorq %r9,%r9 + movq 56(%rsi),%rax + mulq %r11 + movq 56(%rdi),%r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq %r15,%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 40(%rsi),%rax + mulq %r14 + addq %rax,%r10 + movq %r10,88(%rcx) + adcq %rdx,%r8 + adcq $0x0,%r9 + + xorq %r10,%r10 + movq 56(%rsi),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 40(%rsi),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 48(%rsi),%rax + mulq %r14 + addq %rax,%r8 + movq %r8,96(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 56(%rsi),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 48(%rsi),%rax + mulq %r15 + addq %rax,%r9 + movq %r9,104(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 56(%rsi),%rax + mulq %r15 + addq %rax,%r10 + movq %r10,112(%rcx) + adcq %rdx,%r8 + movq %r8,120(%rcx) + + + movq 0(%rsp),%r8 + subq 0(%rcx),%r8 + movq 8(%rsp),%r9 + sbbq 8(%rcx),%r9 + movq 16(%rsp),%r10 + sbbq 16(%rcx),%r10 + movq 24(%rsp),%r11 + sbbq 24(%rcx),%r11 + movq 32(%rsp),%r12 + sbbq 32(%rcx),%r12 + movq 40(%rsp),%r13 + sbbq 40(%rcx),%r13 + movq 48(%rsp),%r14 + sbbq 48(%rcx),%r14 + movq 56(%rsp),%r15 + sbbq 56(%rcx),%r15 + + + movq 64(%rcx),%rax + subq %rax,%r8 + movq 72(%rcx),%rax + sbbq %rax,%r9 + movq 80(%rcx),%rax + sbbq %rax,%r10 + movq 88(%rcx),%rax + sbbq %rax,%r11 + movq 96(%rcx),%rax + sbbq %rax,%r12 + movq 104(%rcx),%rdx + sbbq %rdx,%r13 + movq 112(%rcx),%rdi + sbbq %rdi,%r14 + movq 120(%rcx),%rsi + sbbq %rsi,%r15 + + + addq 32(%rcx),%r8 + movq %r8,32(%rcx) + adcq 40(%rcx),%r9 + movq %r9,40(%rcx) + adcq 48(%rcx),%r10 + movq %r10,48(%rcx) + adcq 56(%rcx),%r11 + movq %r11,56(%rcx) + adcq 64(%rcx),%r12 + movq %r12,64(%rcx) + adcq 72(%rcx),%r13 + movq %r13,72(%rcx) + adcq 80(%rcx),%r14 + movq %r14,80(%rcx) + adcq 88(%rcx),%r15 + movq %r15,88(%rcx) + adcq $0x0,%rax + movq %rax,96(%rcx) + adcq $0x0,%rdx + movq %rdx,104(%rcx) + adcq $0x0,%rdi + movq %rdi,112(%rcx) + adcq $0x0,%rsi + movq %rsi,120(%rcx) + + addq $80,%rsp +.cfi_adjust_cfa_offset -80 + popq %r15 +.cfi_adjust_cfa_offset -8 + popq %r14 +.cfi_adjust_cfa_offset -8 + popq %r13 +.cfi_adjust_cfa_offset -8 + popq %r12 +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.cfi_endproc + +.Lrdc_mulx_asm: +.cfi_startproc + +.cfi_adjust_cfa_offset 32 +.cfi_offset r12, -16 +.cfi_offset r13, -24 +.cfi_offset r14, -32 +.cfi_offset r15, -40 +.cfi_offset rbx, -48 +.cfi_adjust_cfa_offset 8 + + movq 0+0(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r8,%r9 + mulxq 8+.Lp503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+.Lp503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+.Lp503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+.Lp503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 0+8(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+.Lp503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+.Lp503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+.Lp503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+.Lp503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + xorq %r15,%r15 + addq 24(%rdi),%r8 + adcq 32(%rdi),%r9 + adcq 40(%rdi),%r10 + adcq 48(%rdi),%r11 + adcq 56(%rdi),%r12 + adcq 64(%rdi),%r13 + adcq 72(%rdi),%r14 + adcq 80(%rdi),%r15 + movq %r8,24(%rdi) + movq %r9,32(%rdi) + movq %r10,40(%rdi) + movq %r11,48(%rdi) + movq %r12,56(%rdi) + movq %r13,64(%rdi) + movq %r14,72(%rdi) + movq %r15,80(%rdi) + movq 88(%rdi),%r8 + movq 96(%rdi),%r9 + movq 104(%rdi),%r10 + movq 112(%rdi),%r11 + movq 120(%rdi),%r12 + adcq $0x0,%r8 + adcq $0x0,%r9 + adcq $0x0,%r10 + adcq $0x0,%r11 + adcq $0x0,%r12 + movq %r8,88(%rdi) + movq %r9,96(%rdi) + movq %r10,104(%rdi) + movq %r11,112(%rdi) + movq %r12,120(%rdi) + + movq 16+0(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r8,%r9 + mulxq 8+.Lp503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+.Lp503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+.Lp503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+.Lp503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 16+8(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+.Lp503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+.Lp503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+.Lp503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+.Lp503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + xorq %r15,%r15 + addq 40(%rdi),%r8 + adcq 48(%rdi),%r9 + adcq 56(%rdi),%r10 + adcq 64(%rdi),%r11 + adcq 72(%rdi),%r12 + adcq 80(%rdi),%r13 + adcq 88(%rdi),%r14 + adcq 96(%rdi),%r15 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + movq %r10,56(%rdi) + movq %r11,64(%rdi) + movq %r12,72(%rdi) + movq %r13,80(%rdi) + movq %r14,88(%rdi) + movq %r15,96(%rdi) + movq 104(%rdi),%r8 + movq 112(%rdi),%r9 + movq 120(%rdi),%r10 + adcq $0x0,%r8 + adcq $0x0,%r9 + adcq $0x0,%r10 + movq %r8,104(%rdi) + movq %r9,112(%rdi) + movq %r10,120(%rdi) + + movq 32+0(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r8,%r9 + mulxq 8+.Lp503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+.Lp503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+.Lp503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+.Lp503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 32+8(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+.Lp503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+.Lp503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+.Lp503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+.Lp503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + xorq %r15,%r15 + xorq %rbx,%rbx + addq 56(%rdi),%r8 + adcq 64(%rdi),%r9 + adcq 72(%rdi),%r10 + adcq 80(%rdi),%r11 + adcq 88(%rdi),%r12 + adcq 96(%rdi),%r13 + adcq 104(%rdi),%r14 + adcq 112(%rdi),%r15 + adcq 120(%rdi),%rbx + movq %r8,56(%rdi) + movq %r9,(%rsi) + movq %r10,72(%rdi) + movq %r11,80(%rdi) + movq %r12,88(%rdi) + movq %r13,96(%rdi) + movq %r14,104(%rdi) + movq %r15,112(%rdi) + movq %rbx,120(%rdi) + + movq 48+0(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r8,%r9 + mulxq 8+.Lp503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+.Lp503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+.Lp503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+.Lp503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 48+8(%rdi),%rdx + mulxq 0+.Lp503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+.Lp503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+.Lp503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+.Lp503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+.Lp503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + addq 72(%rdi),%r8 + adcq 80(%rdi),%r9 + adcq 88(%rdi),%r10 + adcq 96(%rdi),%r11 + adcq 104(%rdi),%r12 + adcq 112(%rdi),%r13 + adcq 120(%rdi),%r14 + movq %r8,8(%rsi) + movq %r9,16(%rsi) + movq %r10,24(%rsi) + movq %r11,32(%rsi) + movq %r12,40(%rsi) + movq %r13,48(%rsi) + movq %r14,56(%rsi) + + popq %rbx +.cfi_adjust_cfa_offset -8 +.cfi_same_value rbx + popq %r15 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r15 + popq %r14 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r14 + popq %r13 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r13 + popq %r12 +.cfi_adjust_cfa_offset -8 +.cfi_same_value r12 + .byte 0xf3,0xc3 +.cfi_endproc +.globl sike_fprdc +.hidden sike_fprdc +.type sike_fprdc,@function +sike_fprdc: +.cfi_startproc + pushq %r12 +.cfi_adjust_cfa_offset 8 +.cfi_offset r12, -16 + pushq %r13 +.cfi_adjust_cfa_offset 8 +.cfi_offset r13, -24 + pushq %r14 +.cfi_adjust_cfa_offset 8 +.cfi_offset r14, -32 + pushq %r15 +.cfi_adjust_cfa_offset 8 +.cfi_offset r15, -40 + pushq %rbx +.cfi_adjust_cfa_offset 8 +.cfi_offset rbx, -48 + + leaq OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je .Lrdc_mulx_asm + + + + + leaq .Lp503p1(%rip),%rbx + + movq (%rdi),%r11 + movq (%rbx),%rax + mulq %r11 + xorq %r8,%r8 + addq 24(%rdi),%rax + movq %rax,24(%rsi) + adcq %rdx,%r8 + + xorq %r9,%r9 + movq 8(%rbx),%rax + mulq %r11 + xorq %r10,%r10 + addq %rax,%r8 + adcq %rdx,%r9 + + movq 8(%rdi),%r12 + movq (%rbx),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 32(%rdi),%r8 + movq %r8,32(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + xorq %r8,%r8 + movq 16(%rbx),%rax + mulq %r11 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 8(%rbx),%rax + mulq %r12 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 16(%rdi),%r13 + movq (%rbx),%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + addq 40(%rdi),%r9 + movq %r9,40(%rsi) + adcq $0,%r10 + adcq $0,%r8 + + xorq %r9,%r9 + movq 24(%rbx),%rax + mulq %r11 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 16(%rbx),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 8(%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 24(%rsi),%r14 + movq (%rbx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + addq 48(%rdi),%r10 + movq %r10,48(%rsi) + adcq $0,%r8 + adcq $0,%r9 + + xorq %r10,%r10 + movq 32(%rbx),%rax + mulq %r11 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 24(%rbx),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 16(%rbx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 8(%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 32(%rsi),%r15 + movq (%rbx),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 56(%rdi),%r8 + movq %r8,56(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + xorq %r8,%r8 + movq 32(%rbx),%rax + mulq %r12 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 24(%rbx),%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 16(%rbx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 8(%rbx),%rax + mulq %r15 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 40(%rsi),%rcx + movq (%rbx),%rax + mulq %rcx + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + addq 64(%rdi),%r9 + movq %r9,(%rsi) + adcq $0,%r10 + adcq $0,%r8 + + xorq %r9,%r9 + movq 32(%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 24(%rbx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 16(%rbx),%rax + mulq %r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 8(%rbx),%rax + mulq %rcx + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 48(%rsi),%r13 + movq (%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + addq 72(%rdi),%r10 + movq %r10,8(%rsi) + adcq $0,%r8 + adcq $0,%r9 + + xorq %r10,%r10 + movq 32(%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 24(%rbx),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 16(%rbx),%rax + mulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 8(%rbx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 56(%rsi),%r14 + movq (%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 80(%rdi),%r8 + movq %r8,16(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + xorq %r8,%r8 + movq 32(%rbx),%rax + mulq %r15 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 24(%rbx),%rax + mulq %rcx + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 16(%rbx),%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 8(%rbx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + addq 88(%rdi),%r9 + movq %r9,24(%rsi) + adcq $0,%r10 + adcq $0,%r8 + + xorq %r9,%r9 + movq 32(%rbx),%rax + mulq %rcx + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 24(%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 16(%rbx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + addq 96(%rdi),%r10 + movq %r10,32(%rsi) + adcq $0,%r8 + adcq $0,%r9 + + xorq %r10,%r10 + movq 32(%rbx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 24(%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 104(%rdi),%r8 + movq %r8,40(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + movq 32(%rbx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + addq 112(%rdi),%r9 + movq %r9,48(%rsi) + adcq $0,%r10 + addq 120(%rdi),%r10 + movq %r10,56(%rsi) + + popq %rbx +.cfi_adjust_cfa_offset -8 + popq %r15 +.cfi_adjust_cfa_offset -8 + popq %r14 +.cfi_adjust_cfa_offset -8 + popq %r13 +.cfi_adjust_cfa_offset -8 + popq %r12 +.cfi_adjust_cfa_offset -8 + .byte 0xf3,0xc3 +.cfi_endproc +#endif +#endif // defined(__x86_64__) && defined(__linux__) diff --git a/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.mac.x86_64.S b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.mac.x86_64.S new file mode 100644 index 00000000..c9b811b9 --- /dev/null +++ b/Sources/CNIOBoringSSL/crypto/third_party/sike/asm/fp-x86_64.mac.x86_64.S @@ -0,0 +1,2088 @@ +#define BORINGSSL_PREFIX CNIOBoringSSL +#if defined(__x86_64__) && defined(__APPLE__) +# This file is generated from a similarly-named Perl script in the BoringSSL +# source tree. Do not edit by hand. + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +#define OPENSSL_NO_ASM +#endif +#endif + +#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +#if defined(BORINGSSL_PREFIX) +#include +#endif +.text + + +L$p503x2: +.quad 0xFFFFFFFFFFFFFFFE +.quad 0xFFFFFFFFFFFFFFFF +.quad 0x57FFFFFFFFFFFFFF +.quad 0x2610B7B44423CF41 +.quad 0x3737ED90F6FCFB5E +.quad 0xC08B8D7BB4EF49A0 +.quad 0x0080CDEA83023C3C + + +L$p503p1: +.quad 0xAC00000000000000 +.quad 0x13085BDA2211E7A0 +.quad 0x1B9BF6C87B7E7DAF +.quad 0x6045C6BDDA77A4D0 +.quad 0x004066F541811E1E + +L$p503p1_nz: +.quad 0xAC00000000000000 +.quad 0x13085BDA2211E7A0 +.quad 0x1B9BF6C87B7E7DAF +.quad 0x6045C6BDDA77A4D0 +.quad 0x004066F541811E1E + + +.private_extern _OPENSSL_ia32cap_P + +.globl _sike_fpadd +.private_extern _sike_fpadd + +_sike_fpadd: + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + pushq %r15 + + + + xorq %rax,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + adcq 32(%rsi),%r12 + adcq 40(%rsi),%r13 + adcq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + + movq L$p503x2(%rip),%rcx; + subq %rcx,%r8 + movq 8+L$p503x2(%rip),%rcx; + sbbq %rcx,%r9 + sbbq %rcx,%r10 + movq 16+L$p503x2(%rip),%rcx; + sbbq %rcx,%r11 + movq 24+L$p503x2(%rip),%rcx; + sbbq %rcx,%r12 + movq 32+L$p503x2(%rip),%rcx; + sbbq %rcx,%r13 + movq 40+L$p503x2(%rip),%rcx; + sbbq %rcx,%r14 + movq 48+L$p503x2(%rip),%rcx; + sbbq %rcx,%r15 + sbbq $0,%rax + + movq L$p503x2(%rip),%rdi + andq %rax,%rdi + movq 8+L$p503x2(%rip),%rsi + andq %rax,%rsi + movq 16+L$p503x2(%rip),%rcx + andq %rax,%rcx + + addq %rdi,%r8 + movq %r8,0(%rdx) + adcq %rsi,%r9 + movq %r9,8(%rdx) + adcq %rsi,%r10 + movq %r10,16(%rdx) + adcq %rcx,%r11 + movq %r11,24(%rdx) + + setc %cl + + movq 24+L$p503x2(%rip),%r8 + andq %rax,%r8 + movq 32+L$p503x2(%rip),%r9 + andq %rax,%r9 + movq 40+L$p503x2(%rip),%r10 + andq %rax,%r10 + movq 48+L$p503x2(%rip),%r11 + andq %rax,%r11 + + btq $0,%rcx + + adcq %r8,%r12 + movq %r12,32(%rdx) + adcq %r9,%r13 + movq %r13,40(%rdx) + adcq %r10,%r14 + movq %r14,48(%rdx) + adcq %r11,%r15 + movq %r15,56(%rdx) + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + .byte 0xf3,0xc3 + +.globl _sike_cswap_asm +.private_extern _sike_cswap_asm + +_sike_cswap_asm: + + + movq %rdx,%xmm3 + + + + + + pshufd $68,%xmm3,%xmm3 + + movdqu 0(%rdi),%xmm0 + movdqu 0(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,0(%rdi) + movdqu %xmm1,0(%rsi) + + movdqu 16(%rdi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,16(%rdi) + movdqu %xmm1,16(%rsi) + + movdqu 32(%rdi),%xmm0 + movdqu 32(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,32(%rdi) + movdqu %xmm1,32(%rsi) + + movdqu 48(%rdi),%xmm0 + movdqu 48(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,48(%rdi) + movdqu %xmm1,48(%rsi) + + movdqu 64(%rdi),%xmm0 + movdqu 64(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,64(%rdi) + movdqu %xmm1,64(%rsi) + + movdqu 80(%rdi),%xmm0 + movdqu 80(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,80(%rdi) + movdqu %xmm1,80(%rsi) + + movdqu 96(%rdi),%xmm0 + movdqu 96(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,96(%rdi) + movdqu %xmm1,96(%rsi) + + movdqu 112(%rdi),%xmm0 + movdqu 112(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,112(%rdi) + movdqu %xmm1,112(%rsi) + + movdqu 128(%rdi),%xmm0 + movdqu 128(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,128(%rdi) + movdqu %xmm1,128(%rsi) + + movdqu 144(%rdi),%xmm0 + movdqu 144(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,144(%rdi) + movdqu %xmm1,144(%rsi) + + movdqu 160(%rdi),%xmm0 + movdqu 160(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,160(%rdi) + movdqu %xmm1,160(%rsi) + + movdqu 176(%rdi),%xmm0 + movdqu 176(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,176(%rdi) + movdqu %xmm1,176(%rsi) + + movdqu 192(%rdi),%xmm0 + movdqu 192(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,192(%rdi) + movdqu %xmm1,192(%rsi) + + movdqu 208(%rdi),%xmm0 + movdqu 208(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,208(%rdi) + movdqu %xmm1,208(%rsi) + + movdqu 224(%rdi),%xmm0 + movdqu 224(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,224(%rdi) + movdqu %xmm1,224(%rsi) + + movdqu 240(%rdi),%xmm0 + movdqu 240(%rsi),%xmm1 + movdqa %xmm1,%xmm2 + pxor %xmm0,%xmm2 + pand %xmm3,%xmm2 + pxor %xmm2,%xmm0 + pxor %xmm2,%xmm1 + movdqu %xmm0,240(%rdi) + movdqu %xmm1,240(%rsi) + + .byte 0xf3,0xc3 +.globl _sike_fpsub +.private_extern _sike_fpsub + +_sike_fpsub: + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + pushq %r15 + + + + xorq %rax,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%r12 + movq 40(%rdi),%r13 + movq 48(%rdi),%r14 + movq 56(%rdi),%r15 + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%r12 + sbbq 40(%rsi),%r13 + sbbq 48(%rsi),%r14 + sbbq 56(%rsi),%r15 + sbbq $0x0,%rax + + movq L$p503x2(%rip),%rdi + andq %rax,%rdi + movq 8+L$p503x2(%rip),%rsi + andq %rax,%rsi + movq 16+L$p503x2(%rip),%rcx + andq %rax,%rcx + + addq %rdi,%r8 + adcq %rsi,%r9 + adcq %rsi,%r10 + adcq %rcx,%r11 + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + + setc %cl + + movq 24+L$p503x2(%rip),%r8 + andq %rax,%r8 + movq 32+L$p503x2(%rip),%r9 + andq %rax,%r9 + movq 40+L$p503x2(%rip),%r10 + andq %rax,%r10 + movq 48+L$p503x2(%rip),%r11 + andq %rax,%r11 + + btq $0x0,%rcx + + adcq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %r11,%r15 + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %r15,56(%rdx) + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + .byte 0xf3,0xc3 + +.globl _sike_mpadd_asm +.private_extern _sike_mpadd_asm + +_sike_mpadd_asm: + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + addq 0(%rsi),%r8 + adcq 8(%rsi),%r9 + adcq 16(%rsi),%r10 + adcq 24(%rsi),%r11 + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + adcq 32(%rsi),%r8 + adcq 40(%rsi),%r9 + adcq 48(%rsi),%r10 + adcq 56(%rsi),%r11 + movq %r8,32(%rdx) + movq %r9,40(%rdx) + movq %r10,48(%rdx) + movq %r11,56(%rdx) + .byte 0xf3,0xc3 + +.globl _sike_mpsubx2_asm +.private_extern _sike_mpsubx2_asm + +_sike_mpsubx2_asm: + + xorq %rax,%rax + + movq 0(%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + movq 32(%rdi),%rcx + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%rcx + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %rcx,32(%rdx) + + movq 40(%rdi),%r8 + movq 48(%rdi),%r9 + movq 56(%rdi),%r10 + movq 64(%rdi),%r11 + movq 72(%rdi),%rcx + sbbq 40(%rsi),%r8 + sbbq 48(%rsi),%r9 + sbbq 56(%rsi),%r10 + sbbq 64(%rsi),%r11 + sbbq 72(%rsi),%rcx + movq %r8,40(%rdx) + movq %r9,48(%rdx) + movq %r10,56(%rdx) + movq %r11,64(%rdx) + movq %rcx,72(%rdx) + + movq 80(%rdi),%r8 + movq 88(%rdi),%r9 + movq 96(%rdi),%r10 + movq 104(%rdi),%r11 + movq 112(%rdi),%rcx + sbbq 80(%rsi),%r8 + sbbq 88(%rsi),%r9 + sbbq 96(%rsi),%r10 + sbbq 104(%rsi),%r11 + sbbq 112(%rsi),%rcx + movq %r8,80(%rdx) + movq %r9,88(%rdx) + movq %r10,96(%rdx) + movq %r11,104(%rdx) + movq %rcx,112(%rdx) + + movq 120(%rdi),%r8 + sbbq 120(%rsi),%r8 + sbbq $0x0,%rax + movq %r8,120(%rdx) + .byte 0xf3,0xc3 + +.globl _sike_mpdblsubx2_asm +.private_extern _sike_mpdblsubx2_asm + +_sike_mpdblsubx2_asm: + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + + xorq %rax,%rax + + movq 0(%rdx),%r8 + movq 8(%rdx),%r9 + movq 16(%rdx),%r10 + movq 24(%rdx),%r11 + movq 32(%rdx),%r12 + movq 40(%rdx),%r13 + movq 48(%rdx),%r14 + movq 56(%rdx),%rcx + subq 0(%rdi),%r8 + sbbq 8(%rdi),%r9 + sbbq 16(%rdi),%r10 + sbbq 24(%rdi),%r11 + sbbq 32(%rdi),%r12 + sbbq 40(%rdi),%r13 + sbbq 48(%rdi),%r14 + sbbq 56(%rdi),%rcx + adcq $0x0,%rax + + subq 0(%rsi),%r8 + sbbq 8(%rsi),%r9 + sbbq 16(%rsi),%r10 + sbbq 24(%rsi),%r11 + sbbq 32(%rsi),%r12 + sbbq 40(%rsi),%r13 + sbbq 48(%rsi),%r14 + sbbq 56(%rsi),%rcx + adcq $0x0,%rax + + movq %r8,0(%rdx) + movq %r9,8(%rdx) + movq %r10,16(%rdx) + movq %r11,24(%rdx) + movq %r12,32(%rdx) + movq %r13,40(%rdx) + movq %r14,48(%rdx) + movq %rcx,56(%rdx) + + movq 64(%rdx),%r8 + movq 72(%rdx),%r9 + movq 80(%rdx),%r10 + movq 88(%rdx),%r11 + movq 96(%rdx),%r12 + movq 104(%rdx),%r13 + movq 112(%rdx),%r14 + movq 120(%rdx),%rcx + + subq %rax,%r8 + sbbq 64(%rdi),%r8 + sbbq 72(%rdi),%r9 + sbbq 80(%rdi),%r10 + sbbq 88(%rdi),%r11 + sbbq 96(%rdi),%r12 + sbbq 104(%rdi),%r13 + sbbq 112(%rdi),%r14 + sbbq 120(%rdi),%rcx + subq 64(%rsi),%r8 + sbbq 72(%rsi),%r9 + sbbq 80(%rsi),%r10 + sbbq 88(%rsi),%r11 + sbbq 96(%rsi),%r12 + sbbq 104(%rsi),%r13 + sbbq 112(%rsi),%r14 + sbbq 120(%rsi),%rcx + + movq %r8,64(%rdx) + movq %r9,72(%rdx) + movq %r10,80(%rdx) + movq %r11,88(%rdx) + movq %r12,96(%rdx) + movq %r13,104(%rdx) + movq %r14,112(%rdx) + movq %rcx,120(%rdx) + + popq %r14 + + popq %r13 + + popq %r12 + + .byte 0xf3,0xc3 + + +L$mul_mulx: + + + + + + + + + movq %rdx,%rcx + + + xorq %rax,%rax + movq (%rdi),%r8 + movq 8(%rdi),%r9 + movq 16(%rdi),%r10 + movq 24(%rdi),%r11 + pushq %rbx + + + + pushq %rbp + + + subq $96,%rsp + + addq 32(%rdi),%r8 + adcq 40(%rdi),%r9 + adcq 48(%rdi),%r10 + adcq 56(%rdi),%r11 + sbbq $0x0,%rax + movq %r8,(%rsp) + movq %r9,8(%rsp) + movq %r10,16(%rsp) + movq %r11,24(%rsp) + + + xorq %rbx,%rbx + movq (%rsi),%r12 + movq 8(%rsi),%r13 + movq 16(%rsi),%r14 + movq 24(%rsi),%r15 + addq 32(%rsi),%r12 + adcq 40(%rsi),%r13 + adcq 48(%rsi),%r14 + adcq 56(%rsi),%r15 + sbbq $0x0,%rbx + movq %r12,32(%rsp) + movq %r13,40(%rsp) + movq %r14,48(%rsp) + movq %r15,56(%rsp) + + + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + + + andq %rbx,%r8 + andq %rbx,%r9 + andq %rbx,%r10 + andq %rbx,%r11 + + + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r8,64(%rsp) + movq %r9,72(%rsp) + movq %r10,80(%rsp) + movq %r11,88(%rsp) + + + movq 0+0(%rsp),%rdx + mulxq 32+0(%rsp),%r9,%r8 + movq %r9,64+0(%rcx) + mulxq 32+8(%rsp),%r10,%r9 + xorq %rax,%rax + adoxq %r10,%r8 + mulxq 32+16(%rsp),%r11,%r10 + adoxq %r11,%r9 + mulxq 32+24(%rsp),%r12,%r11 + adoxq %r12,%r10 + + movq 0+8(%rsp),%rdx + mulxq 32+0(%rsp),%r12,%r13 + adoxq %rax,%r11 + xorq %rax,%rax + mulxq 32+8(%rsp),%r15,%r14 + adoxq %r8,%r12 + movq %r12,64+8(%rcx) + adcxq %r15,%r13 + mulxq 32+16(%rsp),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r9,%r13 + mulxq 32+24(%rsp),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r10,%r14 + + movq 0+16(%rsp),%rdx + mulxq 32+0(%rsp),%r8,%r9 + adoxq %r11,%r15 + adoxq %rax,%rbx + xorq %rax,%rax + mulxq 32+8(%rsp),%r11,%r10 + adoxq %r13,%r8 + movq %r8,64+16(%rcx) + adcxq %r11,%r9 + mulxq 32+16(%rsp),%r12,%r11 + adcxq %r12,%r10 + adoxq %r14,%r9 + mulxq 32+24(%rsp),%rbp,%r12 + adcxq %rbp,%r11 + + adcxq %rax,%r12 + adoxq %r15,%r10 + adoxq %rbx,%r11 + adoxq %rax,%r12 + + movq 0+24(%rsp),%rdx + mulxq 32+0(%rsp),%r8,%r13 + xorq %rax,%rax + mulxq 32+8(%rsp),%r15,%r14 + adcxq %r15,%r13 + adoxq %r8,%r9 + mulxq 32+16(%rsp),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r13,%r10 + mulxq 32+24(%rsp),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r14,%r11 + adoxq %r15,%r12 + adoxq %rax,%rbx + movq %r9,64+24(%rcx) + movq %r10,64+32(%rcx) + movq %r11,64+40(%rcx) + movq %r12,64+48(%rcx) + movq %rbx,64+56(%rcx) + + + + movq 0+0(%rdi),%rdx + mulxq 0+0(%rsi),%r9,%r8 + movq %r9,0+0(%rcx) + mulxq 0+8(%rsi),%r10,%r9 + xorq %rax,%rax + adoxq %r10,%r8 + mulxq 0+16(%rsi),%r11,%r10 + adoxq %r11,%r9 + mulxq 0+24(%rsi),%r12,%r11 + adoxq %r12,%r10 + + movq 0+8(%rdi),%rdx + mulxq 0+0(%rsi),%r12,%r13 + adoxq %rax,%r11 + xorq %rax,%rax + mulxq 0+8(%rsi),%r15,%r14 + adoxq %r8,%r12 + movq %r12,0+8(%rcx) + adcxq %r15,%r13 + mulxq 0+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r9,%r13 + mulxq 0+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r10,%r14 + + movq 0+16(%rdi),%rdx + mulxq 0+0(%rsi),%r8,%r9 + adoxq %r11,%r15 + adoxq %rax,%rbx + xorq %rax,%rax + mulxq 0+8(%rsi),%r11,%r10 + adoxq %r13,%r8 + movq %r8,0+16(%rcx) + adcxq %r11,%r9 + mulxq 0+16(%rsi),%r12,%r11 + adcxq %r12,%r10 + adoxq %r14,%r9 + mulxq 0+24(%rsi),%rbp,%r12 + adcxq %rbp,%r11 + + adcxq %rax,%r12 + adoxq %r15,%r10 + adoxq %rbx,%r11 + adoxq %rax,%r12 + + movq 0+24(%rdi),%rdx + mulxq 0+0(%rsi),%r8,%r13 + xorq %rax,%rax + mulxq 0+8(%rsi),%r15,%r14 + adcxq %r15,%r13 + adoxq %r8,%r9 + mulxq 0+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r13,%r10 + mulxq 0+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r14,%r11 + adoxq %r15,%r12 + adoxq %rax,%rbx + movq %r9,0+24(%rcx) + movq %r10,0+32(%rcx) + movq %r11,0+40(%rcx) + movq %r12,0+48(%rcx) + movq %rbx,0+56(%rcx) + + + + movq 32+0(%rdi),%rdx + mulxq 32+0(%rsi),%r9,%r8 + movq %r9,0+0(%rsp) + mulxq 32+8(%rsi),%r10,%r9 + xorq %rax,%rax + adoxq %r10,%r8 + mulxq 32+16(%rsi),%r11,%r10 + adoxq %r11,%r9 + mulxq 32+24(%rsi),%r12,%r11 + adoxq %r12,%r10 + + movq 32+8(%rdi),%rdx + mulxq 32+0(%rsi),%r12,%r13 + adoxq %rax,%r11 + xorq %rax,%rax + mulxq 32+8(%rsi),%r15,%r14 + adoxq %r8,%r12 + movq %r12,0+8(%rsp) + adcxq %r15,%r13 + mulxq 32+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r9,%r13 + mulxq 32+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r10,%r14 + + movq 32+16(%rdi),%rdx + mulxq 32+0(%rsi),%r8,%r9 + adoxq %r11,%r15 + adoxq %rax,%rbx + xorq %rax,%rax + mulxq 32+8(%rsi),%r11,%r10 + adoxq %r13,%r8 + movq %r8,0+16(%rsp) + adcxq %r11,%r9 + mulxq 32+16(%rsi),%r12,%r11 + adcxq %r12,%r10 + adoxq %r14,%r9 + mulxq 32+24(%rsi),%rbp,%r12 + adcxq %rbp,%r11 + + adcxq %rax,%r12 + adoxq %r15,%r10 + adoxq %rbx,%r11 + adoxq %rax,%r12 + + movq 32+24(%rdi),%rdx + mulxq 32+0(%rsi),%r8,%r13 + xorq %rax,%rax + mulxq 32+8(%rsi),%r15,%r14 + adcxq %r15,%r13 + adoxq %r8,%r9 + mulxq 32+16(%rsi),%rbx,%r15 + adcxq %rbx,%r14 + adoxq %r13,%r10 + mulxq 32+24(%rsi),%rbp,%rbx + adcxq %rbp,%r15 + adcxq %rax,%rbx + adoxq %r14,%r11 + adoxq %r15,%r12 + adoxq %rax,%rbx + movq %r9,0+24(%rsp) + movq %r10,0+32(%rsp) + movq %r11,0+40(%rsp) + movq %r12,0+48(%rsp) + movq %rbx,0+56(%rsp) + + + + + movq 64(%rsp),%r8 + movq 72(%rsp),%r9 + movq 80(%rsp),%r10 + movq 88(%rsp),%r11 + movq 96(%rcx),%rax + addq %rax,%r8 + movq 104(%rcx),%rax + adcq %rax,%r9 + movq 112(%rcx),%rax + adcq %rax,%r10 + movq 120(%rcx),%rax + adcq %rax,%r11 + + + movq 64(%rcx),%r12 + movq 72(%rcx),%r13 + movq 80(%rcx),%r14 + movq 88(%rcx),%r15 + subq (%rcx),%r12 + sbbq 8(%rcx),%r13 + sbbq 16(%rcx),%r14 + sbbq 24(%rcx),%r15 + sbbq 32(%rcx),%r8 + sbbq 40(%rcx),%r9 + sbbq 48(%rcx),%r10 + sbbq 56(%rcx),%r11 + + + subq (%rsp),%r12 + sbbq 8(%rsp),%r13 + sbbq 16(%rsp),%r14 + sbbq 24(%rsp),%r15 + sbbq 32(%rsp),%r8 + sbbq 40(%rsp),%r9 + sbbq 48(%rsp),%r10 + sbbq 56(%rsp),%r11 + + addq 32(%rcx),%r12 + movq %r12,32(%rcx) + adcq 40(%rcx),%r13 + movq %r13,40(%rcx) + adcq 48(%rcx),%r14 + movq %r14,48(%rcx) + adcq 56(%rcx),%r15 + movq %r15,56(%rcx) + movq (%rsp),%rax + adcq %rax,%r8 + movq %r8,64(%rcx) + movq 8(%rsp),%rax + adcq %rax,%r9 + movq %r9,72(%rcx) + movq 16(%rsp),%rax + adcq %rax,%r10 + movq %r10,80(%rcx) + movq 24(%rsp),%rax + adcq %rax,%r11 + movq %r11,88(%rcx) + movq 32(%rsp),%r12 + adcq $0x0,%r12 + movq %r12,96(%rcx) + movq 40(%rsp),%r13 + adcq $0x0,%r13 + movq %r13,104(%rcx) + movq 48(%rsp),%r14 + adcq $0x0,%r14 + movq %r14,112(%rcx) + movq 56(%rsp),%r15 + adcq $0x0,%r15 + movq %r15,120(%rcx) + + addq $96,%rsp + + popq %rbp + + + popq %rbx + + + popq %r15 + + + popq %r14 + + + popq %r13 + + + popq %r12 + + + .byte 0xf3,0xc3 + + +.globl _sike_mpmul +.private_extern _sike_mpmul + +_sike_mpmul: + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + pushq %r15 + + + + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$mul_mulx + + + + movq %rdx,%rcx + + + xorq %rax,%rax + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + addq 0(%rdi),%r8 + adcq 8(%rdi),%r9 + adcq 16(%rdi),%r10 + adcq 24(%rdi),%r11 + movq %r8,0(%rcx) + movq %r9,8(%rcx) + movq %r10,16(%rcx) + movq %r11,24(%rcx) + sbbq $0,%rax + subq $80,%rsp + + + + xorq %rdx,%rdx + movq 32(%rsi),%r12 + movq 40(%rsi),%r13 + movq 48(%rsi),%r14 + movq 56(%rsi),%r15 + addq 0(%rsi),%r12 + adcq 8(%rsi),%r13 + adcq 16(%rsi),%r14 + adcq 24(%rsi),%r15 + sbbq $0x0,%rdx + movq %rax,64(%rsp) + movq %rdx,72(%rsp) + + + movq (%rcx),%rax + mulq %r12 + movq %rax,(%rsp) + movq %rdx,%r8 + + xorq %r9,%r9 + movq (%rcx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + + xorq %r10,%r10 + movq 8(%rcx),%rax + mulq %r12 + addq %rax,%r8 + movq %r8,8(%rsp) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq (%rcx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 16(%rcx),%rax + mulq %r12 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 8(%rcx),%rax + mulq %r13 + addq %rax,%r9 + movq %r9,16(%rsp) + adcq %rdx,%r10 + adcq $0x0,%r8 + + xorq %r9,%r9 + movq (%rcx),%rax + mulq %r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 24(%rcx),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 8(%rcx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 16(%rcx),%rax + mulq %r13 + addq %rax,%r10 + movq %r10,24(%rsp) + adcq %rdx,%r8 + adcq $0x0,%r9 + + xorq %r10,%r10 + movq 8(%rcx),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 24(%rcx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 16(%rcx),%rax + mulq %r14 + addq %rax,%r8 + movq %r8,32(%rsp) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r11,%r11 + movq 16(%rcx),%rax + mulq %r15 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r11 + + movq 24(%rcx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r11 + + movq 24(%rcx),%rax + mulq %r15 + addq %rax,%r10 + adcq %rdx,%r11 + + movq 64(%rsp),%rax + andq %rax,%r12 + andq %rax,%r13 + andq %rax,%r14 + andq %rax,%r15 + addq %r8,%r12 + adcq %r9,%r13 + adcq %r10,%r14 + adcq %r11,%r15 + + movq 72(%rsp),%rax + movq (%rcx),%r8 + movq 8(%rcx),%r9 + movq 16(%rcx),%r10 + movq 24(%rcx),%r11 + andq %rax,%r8 + andq %rax,%r9 + andq %rax,%r10 + andq %rax,%r11 + addq %r12,%r8 + adcq %r13,%r9 + adcq %r14,%r10 + adcq %r15,%r11 + movq %r8,32(%rsp) + movq %r9,40(%rsp) + movq %r10,48(%rsp) + movq %r11,56(%rsp) + + movq (%rdi),%r11 + movq (%rsi),%rax + mulq %r11 + xorq %r9,%r9 + movq %rax,(%rcx) + movq %rdx,%r8 + + movq 16(%rdi),%r14 + movq 8(%rsi),%rax + mulq %r11 + xorq %r10,%r10 + addq %rax,%r8 + adcq %rdx,%r9 + + movq 8(%rdi),%r12 + movq (%rsi),%rax + mulq %r12 + addq %rax,%r8 + movq %r8,8(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 16(%rsi),%rax + mulq %r11 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq (%rsi),%r13 + movq %r14,%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 8(%rsi),%rax + mulq %r12 + addq %rax,%r9 + movq %r9,16(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + xorq %r9,%r9 + movq 24(%rsi),%rax + mulq %r11 + movq 24(%rdi),%r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq %r15,%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 16(%rsi),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 8(%rsi),%rax + mulq %r14 + addq %rax,%r10 + movq %r10,24(%rcx) + adcq %rdx,%r8 + adcq $0x0,%r9 + + xorq %r10,%r10 + movq 24(%rsi),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 8(%rsi),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 16(%rsi),%rax + mulq %r14 + addq %rax,%r8 + movq %r8,32(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 24(%rsi),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 16(%rsi),%rax + mulq %r15 + addq %rax,%r9 + movq %r9,40(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 24(%rsi),%rax + mulq %r15 + addq %rax,%r10 + movq %r10,48(%rcx) + adcq %rdx,%r8 + movq %r8,56(%rcx) + + + movq 32(%rdi),%r11 + movq 32(%rsi),%rax + mulq %r11 + xorq %r9,%r9 + movq %rax,64(%rcx) + movq %rdx,%r8 + + movq 48(%rdi),%r14 + movq 40(%rsi),%rax + mulq %r11 + xorq %r10,%r10 + addq %rax,%r8 + adcq %rdx,%r9 + + movq 40(%rdi),%r12 + movq 32(%rsi),%rax + mulq %r12 + addq %rax,%r8 + movq %r8,72(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 48(%rsi),%rax + mulq %r11 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 32(%rsi),%r13 + movq %r14,%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 40(%rsi),%rax + mulq %r12 + addq %rax,%r9 + movq %r9,80(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + xorq %r9,%r9 + movq 56(%rsi),%rax + mulq %r11 + movq 56(%rdi),%r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq %r15,%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 48(%rsi),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0x0,%r9 + + movq 40(%rsi),%rax + mulq %r14 + addq %rax,%r10 + movq %r10,88(%rcx) + adcq %rdx,%r8 + adcq $0x0,%r9 + + xorq %r10,%r10 + movq 56(%rsi),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 40(%rsi),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0x0,%r10 + + movq 48(%rsi),%rax + mulq %r14 + addq %rax,%r8 + movq %r8,96(%rcx) + adcq %rdx,%r9 + adcq $0x0,%r10 + + xorq %r8,%r8 + movq 56(%rsi),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 48(%rsi),%rax + mulq %r15 + addq %rax,%r9 + movq %r9,104(%rcx) + adcq %rdx,%r10 + adcq $0x0,%r8 + + movq 56(%rsi),%rax + mulq %r15 + addq %rax,%r10 + movq %r10,112(%rcx) + adcq %rdx,%r8 + movq %r8,120(%rcx) + + + movq 0(%rsp),%r8 + subq 0(%rcx),%r8 + movq 8(%rsp),%r9 + sbbq 8(%rcx),%r9 + movq 16(%rsp),%r10 + sbbq 16(%rcx),%r10 + movq 24(%rsp),%r11 + sbbq 24(%rcx),%r11 + movq 32(%rsp),%r12 + sbbq 32(%rcx),%r12 + movq 40(%rsp),%r13 + sbbq 40(%rcx),%r13 + movq 48(%rsp),%r14 + sbbq 48(%rcx),%r14 + movq 56(%rsp),%r15 + sbbq 56(%rcx),%r15 + + + movq 64(%rcx),%rax + subq %rax,%r8 + movq 72(%rcx),%rax + sbbq %rax,%r9 + movq 80(%rcx),%rax + sbbq %rax,%r10 + movq 88(%rcx),%rax + sbbq %rax,%r11 + movq 96(%rcx),%rax + sbbq %rax,%r12 + movq 104(%rcx),%rdx + sbbq %rdx,%r13 + movq 112(%rcx),%rdi + sbbq %rdi,%r14 + movq 120(%rcx),%rsi + sbbq %rsi,%r15 + + + addq 32(%rcx),%r8 + movq %r8,32(%rcx) + adcq 40(%rcx),%r9 + movq %r9,40(%rcx) + adcq 48(%rcx),%r10 + movq %r10,48(%rcx) + adcq 56(%rcx),%r11 + movq %r11,56(%rcx) + adcq 64(%rcx),%r12 + movq %r12,64(%rcx) + adcq 72(%rcx),%r13 + movq %r13,72(%rcx) + adcq 80(%rcx),%r14 + movq %r14,80(%rcx) + adcq 88(%rcx),%r15 + movq %r15,88(%rcx) + adcq $0x0,%rax + movq %rax,96(%rcx) + adcq $0x0,%rdx + movq %rdx,104(%rcx) + adcq $0x0,%rdi + movq %rdi,112(%rcx) + adcq $0x0,%rsi + movq %rsi,120(%rcx) + + addq $80,%rsp + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + .byte 0xf3,0xc3 + + +L$rdc_mulx_asm: + + + + + + + + + + + movq 0+0(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r8,%r9 + mulxq 8+L$p503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+L$p503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+L$p503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+L$p503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 0+8(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+L$p503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+L$p503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+L$p503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+L$p503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + xorq %r15,%r15 + addq 24(%rdi),%r8 + adcq 32(%rdi),%r9 + adcq 40(%rdi),%r10 + adcq 48(%rdi),%r11 + adcq 56(%rdi),%r12 + adcq 64(%rdi),%r13 + adcq 72(%rdi),%r14 + adcq 80(%rdi),%r15 + movq %r8,24(%rdi) + movq %r9,32(%rdi) + movq %r10,40(%rdi) + movq %r11,48(%rdi) + movq %r12,56(%rdi) + movq %r13,64(%rdi) + movq %r14,72(%rdi) + movq %r15,80(%rdi) + movq 88(%rdi),%r8 + movq 96(%rdi),%r9 + movq 104(%rdi),%r10 + movq 112(%rdi),%r11 + movq 120(%rdi),%r12 + adcq $0x0,%r8 + adcq $0x0,%r9 + adcq $0x0,%r10 + adcq $0x0,%r11 + adcq $0x0,%r12 + movq %r8,88(%rdi) + movq %r9,96(%rdi) + movq %r10,104(%rdi) + movq %r11,112(%rdi) + movq %r12,120(%rdi) + + movq 16+0(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r8,%r9 + mulxq 8+L$p503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+L$p503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+L$p503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+L$p503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 16+8(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+L$p503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+L$p503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+L$p503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+L$p503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + xorq %r15,%r15 + addq 40(%rdi),%r8 + adcq 48(%rdi),%r9 + adcq 56(%rdi),%r10 + adcq 64(%rdi),%r11 + adcq 72(%rdi),%r12 + adcq 80(%rdi),%r13 + adcq 88(%rdi),%r14 + adcq 96(%rdi),%r15 + movq %r8,40(%rdi) + movq %r9,48(%rdi) + movq %r10,56(%rdi) + movq %r11,64(%rdi) + movq %r12,72(%rdi) + movq %r13,80(%rdi) + movq %r14,88(%rdi) + movq %r15,96(%rdi) + movq 104(%rdi),%r8 + movq 112(%rdi),%r9 + movq 120(%rdi),%r10 + adcq $0x0,%r8 + adcq $0x0,%r9 + adcq $0x0,%r10 + movq %r8,104(%rdi) + movq %r9,112(%rdi) + movq %r10,120(%rdi) + + movq 32+0(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r8,%r9 + mulxq 8+L$p503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+L$p503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+L$p503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+L$p503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 32+8(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+L$p503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+L$p503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+L$p503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+L$p503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + xorq %r15,%r15 + xorq %rbx,%rbx + addq 56(%rdi),%r8 + adcq 64(%rdi),%r9 + adcq 72(%rdi),%r10 + adcq 80(%rdi),%r11 + adcq 88(%rdi),%r12 + adcq 96(%rdi),%r13 + adcq 104(%rdi),%r14 + adcq 112(%rdi),%r15 + adcq 120(%rdi),%rbx + movq %r8,56(%rdi) + movq %r9,(%rsi) + movq %r10,72(%rdi) + movq %r11,80(%rdi) + movq %r12,88(%rdi) + movq %r13,96(%rdi) + movq %r14,104(%rdi) + movq %r15,112(%rdi) + movq %rbx,120(%rdi) + + movq 48+0(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r8,%r9 + mulxq 8+L$p503p1_nz(%rip),%r12,%r10 + + xorq %rax,%rax + mulxq 16+L$p503p1_nz(%rip),%r13,%r11 + adoxq %r12,%r9 + adoxq %r13,%r10 + mulxq 24+L$p503p1_nz(%rip),%rbx,%r12 + adoxq %rbx,%r11 + mulxq 32+L$p503p1_nz(%rip),%r14,%r13 + adoxq %r14,%r12 + adoxq %rax,%r13 + + movq 48+8(%rdi),%rdx + mulxq 0+L$p503p1_nz(%rip),%r14,%rbx + adcxq %r14,%r9 + adcxq %rbx,%r10 + mulxq 8+L$p503p1_nz(%rip),%rcx,%r14 + adcxq %r14,%r11 + mulxq 16+L$p503p1_nz(%rip),%rbx,%r15 + adcxq %r15,%r12 + mulxq 24+L$p503p1_nz(%rip),%r15,%r14 + adcxq %r14,%r13 + mulxq 32+L$p503p1_nz(%rip),%rdx,%r14 + adcxq %rax,%r14 + + xorq %rax,%rax + adoxq %rcx,%r10 + adoxq %rbx,%r11 + adoxq %r15,%r12 + adoxq %rdx,%r13 + adoxq %rax,%r14 + + + + addq 72(%rdi),%r8 + adcq 80(%rdi),%r9 + adcq 88(%rdi),%r10 + adcq 96(%rdi),%r11 + adcq 104(%rdi),%r12 + adcq 112(%rdi),%r13 + adcq 120(%rdi),%r14 + movq %r8,8(%rsi) + movq %r9,16(%rsi) + movq %r10,24(%rsi) + movq %r11,32(%rsi) + movq %r12,40(%rsi) + movq %r13,48(%rsi) + movq %r14,56(%rsi) + + popq %rbx + + + popq %r15 + + + popq %r14 + + + popq %r13 + + + popq %r12 + + + .byte 0xf3,0xc3 + +.globl _sike_fprdc +.private_extern _sike_fprdc + +_sike_fprdc: + + pushq %r12 + + + pushq %r13 + + + pushq %r14 + + + pushq %r15 + + + pushq %rbx + + + + leaq _OPENSSL_ia32cap_P(%rip),%rcx + movq 8(%rcx),%rcx + andl $0x80100,%ecx + cmpl $0x80100,%ecx + je L$rdc_mulx_asm + + + + + leaq L$p503p1(%rip),%rbx + + movq (%rdi),%r11 + movq (%rbx),%rax + mulq %r11 + xorq %r8,%r8 + addq 24(%rdi),%rax + movq %rax,24(%rsi) + adcq %rdx,%r8 + + xorq %r9,%r9 + movq 8(%rbx),%rax + mulq %r11 + xorq %r10,%r10 + addq %rax,%r8 + adcq %rdx,%r9 + + movq 8(%rdi),%r12 + movq (%rbx),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 32(%rdi),%r8 + movq %r8,32(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + xorq %r8,%r8 + movq 16(%rbx),%rax + mulq %r11 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 8(%rbx),%rax + mulq %r12 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 16(%rdi),%r13 + movq (%rbx),%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + addq 40(%rdi),%r9 + movq %r9,40(%rsi) + adcq $0,%r10 + adcq $0,%r8 + + xorq %r9,%r9 + movq 24(%rbx),%rax + mulq %r11 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 16(%rbx),%rax + mulq %r12 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 8(%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 24(%rsi),%r14 + movq (%rbx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + addq 48(%rdi),%r10 + movq %r10,48(%rsi) + adcq $0,%r8 + adcq $0,%r9 + + xorq %r10,%r10 + movq 32(%rbx),%rax + mulq %r11 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 24(%rbx),%rax + mulq %r12 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 16(%rbx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 8(%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 32(%rsi),%r15 + movq (%rbx),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 56(%rdi),%r8 + movq %r8,56(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + xorq %r8,%r8 + movq 32(%rbx),%rax + mulq %r12 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 24(%rbx),%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 16(%rbx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 8(%rbx),%rax + mulq %r15 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 40(%rsi),%rcx + movq (%rbx),%rax + mulq %rcx + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + addq 64(%rdi),%r9 + movq %r9,(%rsi) + adcq $0,%r10 + adcq $0,%r8 + + xorq %r9,%r9 + movq 32(%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 24(%rbx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 16(%rbx),%rax + mulq %r15 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 8(%rbx),%rax + mulq %rcx + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 48(%rsi),%r13 + movq (%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + addq 72(%rdi),%r10 + movq %r10,8(%rsi) + adcq $0,%r8 + adcq $0,%r9 + + xorq %r10,%r10 + movq 32(%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 24(%rbx),%rax + mulq %r15 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 16(%rbx),%rax + mulq %rcx + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 8(%rbx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 56(%rsi),%r14 + movq (%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 80(%rdi),%r8 + movq %r8,16(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + xorq %r8,%r8 + movq 32(%rbx),%rax + mulq %r15 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 24(%rbx),%rax + mulq %rcx + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 16(%rbx),%rax + mulq %r13 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + + movq 8(%rbx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + adcq $0,%r8 + addq 88(%rdi),%r9 + movq %r9,24(%rsi) + adcq $0,%r10 + adcq $0,%r8 + + xorq %r9,%r9 + movq 32(%rbx),%rax + mulq %rcx + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 24(%rbx),%rax + mulq %r13 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + + movq 16(%rbx),%rax + mulq %r14 + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%r9 + addq 96(%rdi),%r10 + movq %r10,32(%rsi) + adcq $0,%r8 + adcq $0,%r9 + + xorq %r10,%r10 + movq 32(%rbx),%rax + mulq %r13 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + + movq 24(%rbx),%rax + mulq %r14 + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r10 + addq 104(%rdi),%r8 + movq %r8,40(%rsi) + adcq $0,%r9 + adcq $0,%r10 + + movq 32(%rbx),%rax + mulq %r14 + addq %rax,%r9 + adcq %rdx,%r10 + addq 112(%rdi),%r9 + movq %r9,48(%rsi) + adcq $0,%r10 + addq 120(%rdi),%r10 + movq %r10,56(%rsi) + + popq %rbx + + popq %r15 + + popq %r14 + + popq %r13 + + popq %r12 + + .byte 0xf3,0xc3 + +#endif +#endif // defined(__x86_64__) && defined(__APPLE__) diff --git a/Sources/CNIOBoringSSL/crypto/x509/x509_txt.c b/Sources/CNIOBoringSSL/crypto/x509/x509_txt.c index 507a2317..473681e3 100644 --- a/Sources/CNIOBoringSSL/crypto/x509/x509_txt.c +++ b/Sources/CNIOBoringSSL/crypto/x509/x509_txt.c @@ -54,13 +54,10 @@ * copied and put under another distribution licence * [including the GNU Public Licence.] */ -#include #include const char *X509_verify_cert_error_string(long n) { - static char buf[100]; - switch ((int)n) { case X509_V_OK: return ("ok"); @@ -198,8 +195,10 @@ const char *X509_verify_cert_error_string(long n) case X509_V_ERR_STORE_LOOKUP: return ("Issuer certificate lookup error"); + case X509_V_ERR_NAME_CONSTRAINTS_WITHOUT_SANS: + return "Issuer has name constraints but leaf has no SANs"; + default: - BIO_snprintf(buf, sizeof buf, "error number %ld", n); - return (buf); + return "unknown certificate verification error"; } } diff --git a/Sources/CNIOBoringSSL/crypto/x509/x509_vfy.c b/Sources/CNIOBoringSSL/crypto/x509/x509_vfy.c index 8c78a784..aea06902 100644 --- a/Sources/CNIOBoringSSL/crypto/x509/x509_vfy.c +++ b/Sources/CNIOBoringSSL/crypto/x509/x509_vfy.c @@ -70,6 +70,7 @@ #include "vpm_int.h" #include "../internal.h" +#include "../x509v3/internal.h" static CRYPTO_EX_DATA_CLASS g_ex_data_class = CRYPTO_EX_DATA_CLASS_INIT_WITH_APP_DATA; @@ -710,13 +711,40 @@ static int check_chain_extensions(X509_STORE_CTX *ctx) return ok; } +static int reject_dns_name_in_common_name(X509 *x509) +{ + X509_NAME *name = X509_get_subject_name(x509); + int i = -1; + for (;;) { + i = X509_NAME_get_index_by_NID(name, NID_commonName, i); + if (i == -1) { + return X509_V_OK; + } + + X509_NAME_ENTRY *entry = X509_NAME_get_entry(name, i); + ASN1_STRING *common_name = X509_NAME_ENTRY_get_data(entry); + unsigned char *idval; + int idlen = ASN1_STRING_to_UTF8(&idval, common_name); + if (idlen < 0) { + return X509_V_ERR_OUT_OF_MEM; + } + /* Only process attributes that look like host names. Note it is + * important that this check be mirrored in |X509_check_host|. */ + int looks_like_dns = x509v3_looks_like_dns_name(idval, (size_t)idlen); + OPENSSL_free(idval); + if (looks_like_dns) { + return X509_V_ERR_NAME_CONSTRAINTS_WITHOUT_SANS; + } + } +} + static int check_name_constraints(X509_STORE_CTX *ctx) { - X509 *x; int i, j, rv; + int has_name_constraints = 0; /* Check name constraints for all certificates */ for (i = sk_X509_num(ctx->chain) - 1; i >= 0; i--) { - x = sk_X509_value(ctx->chain, i); + X509 *x = sk_X509_value(ctx->chain, i); /* Ignore self issued certs unless last in chain */ if (i && (x->ex_flags & EXFLAG_SI)) continue; @@ -729,6 +757,7 @@ static int check_name_constraints(X509_STORE_CTX *ctx) for (j = sk_X509_num(ctx->chain) - 1; j > i; j--) { NAME_CONSTRAINTS *nc = sk_X509_value(ctx->chain, j)->nc; if (nc) { + has_name_constraints = 1; rv = NAME_CONSTRAINTS_check(x, nc); switch (rv) { case X509_V_OK: @@ -747,6 +776,36 @@ static int check_name_constraints(X509_STORE_CTX *ctx) } } } + + /* Name constraints do not match against the common name, but + * |X509_check_host| still implements the legacy behavior where, on + * certificates lacking a SAN list, DNS-like names in the common name are + * checked instead. + * + * While we could apply the name constraints to the common name, name + * constraints are rare enough that can hold such certificates to a higher + * standard. Note this does not make "DNS-like" heuristic failures any + * worse. A decorative common-name misidentified as a DNS name would fail + * the name constraint anyway. */ + X509 *leaf = sk_X509_value(ctx->chain, 0); + if (has_name_constraints && leaf->altname == NULL) { + rv = reject_dns_name_in_common_name(leaf); + switch (rv) { + case X509_V_OK: + break; + case X509_V_ERR_OUT_OF_MEM: + ctx->error = rv; + return 0; + default: + ctx->error = rv; + ctx->error_depth = i; + ctx->current_cert = leaf; + if (!ctx->verify_cb(0, ctx)) + return 0; + break; + } + } + return 1; } diff --git a/Sources/CNIOBoringSSL/crypto/x509v3/internal.h b/Sources/CNIOBoringSSL/crypto/x509v3/internal.h index 60225c55..43b37908 100644 --- a/Sources/CNIOBoringSSL/crypto/x509v3/internal.h +++ b/Sources/CNIOBoringSSL/crypto/x509v3/internal.h @@ -43,6 +43,11 @@ unsigned char *x509v3_hex_to_bytes(const char *str, long *len); // followed by '.'. Otherwise, it returns a non-zero number. int x509v3_name_cmp(const char *name, const char *cmp); +// x509v3_looks_like_dns_name returns one if |in| looks like a DNS name and zero +// otherwise. +OPENSSL_EXPORT int x509v3_looks_like_dns_name(const unsigned char *in, + size_t len); + #if defined(__cplusplus) } /* extern C */ diff --git a/Sources/CNIOBoringSSL/crypto/x509v3/v3_utl.c b/Sources/CNIOBoringSSL/crypto/x509v3/v3_utl.c index a67648a9..a6d9b5c2 100644 --- a/Sources/CNIOBoringSSL/crypto/x509v3/v3_utl.c +++ b/Sources/CNIOBoringSSL/crypto/x509v3/v3_utl.c @@ -909,6 +909,53 @@ static int equal_wildcard(const unsigned char *pattern, size_t pattern_len, subject, subject_len, flags); } +int x509v3_looks_like_dns_name(const unsigned char *in, size_t len) { + /* This function is used as a heuristic for whether a common name is a + * hostname to be matched, or merely a decorative name to describe the + * subject. This heuristic must be applied to both name constraints and the + * common name fallback, so it must be loose enough to accept hostname + * common names, and tight enough to reject decorative common names. */ + + if (len > 0 && in[len - 1] == '.') { + len--; + } + + /* Wildcards are allowed in front. */ + if (len >= 2 && in[0] == '*' && in[1] == '.') { + in += 2; + len -= 2; + } + + if (len == 0) { + return 0; + } + + size_t label_start = 0; + for (size_t i = 0; i < len; i++) { + unsigned char c = in[i]; + if ((c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c == '-' && i > label_start) || + /* These are not valid characters in hostnames, but commonly found + * in deployments outside the Web PKI. */ + c == '_' || + c == ':') { + continue; + } + + /* Labels must not be empty. */ + if (c == '.' && i > label_start && i < len - 1) { + label_start = i + 1; + continue; + } + + return 0; + } + + return 1; +} + /* * Compare an ASN1_STRING to a supplied string. If they match return 1. If * cmp_type > 0 only compare if string matches the type, otherwise convert it @@ -916,8 +963,8 @@ static int equal_wildcard(const unsigned char *pattern, size_t pattern_len, */ static int do_check_string(ASN1_STRING *a, int cmp_type, equal_fn equal, - unsigned int flags, const char *b, size_t blen, - char **peername) + unsigned int flags, int check_type, const char *b, + size_t blen, char **peername) { int rv = 0; @@ -938,7 +985,17 @@ static int do_check_string(ASN1_STRING *a, int cmp_type, equal_fn equal, astrlen = ASN1_STRING_to_UTF8(&astr, a); if (astrlen < 0) return -1; - rv = equal(astr, astrlen, (unsigned char *)b, blen, flags); + /* + * We check the common name against DNS name constraints if it passes + * |x509v3_looks_like_dns_name|. Thus we must not consider common names + * for DNS fallbacks if they fail this check. + */ + if (check_type == GEN_DNS && + !x509v3_looks_like_dns_name(astr, astrlen)) { + rv = 0; + } else { + rv = equal(astr, astrlen, (unsigned char *)b, blen, flags); + } if (rv > 0 && peername) *peername = BUF_strndup((char *)astr, astrlen); OPENSSL_free(astr); @@ -955,7 +1012,6 @@ static int do_x509_check(X509 *x, const char *chk, size_t chklen, int j; int cnid = NID_undef; int alt_type; - int san_present = 0; int rv = 0; equal_fn equal; @@ -988,7 +1044,6 @@ static int do_x509_check(X509 *x, const char *chk, size_t chklen, gen = sk_GENERAL_NAME_value(gens, i); if (gen->type != check_type) continue; - san_present = 1; if (check_type == GEN_EMAIL) cstr = gen->d.rfc822Name; else if (check_type == GEN_DNS) @@ -996,21 +1051,16 @@ static int do_x509_check(X509 *x, const char *chk, size_t chklen, else cstr = gen->d.iPAddress; /* Positive on success, negative on error! */ - if ((rv = do_check_string(cstr, alt_type, equal, flags, + if ((rv = do_check_string(cstr, alt_type, equal, flags, check_type, chk, chklen, peername)) != 0) break; } GENERAL_NAMES_free(gens); - if (rv != 0) - return rv; - if (cnid == NID_undef - || (san_present - && !(flags & X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT))) - return 0; + return rv; } /* We're done if CN-ID is not pertinent */ - if (cnid == NID_undef) + if (cnid == NID_undef || (flags & X509_CHECK_FLAG_NEVER_CHECK_SUBJECT)) return 0; j = -1; @@ -1021,7 +1071,7 @@ static int do_x509_check(X509 *x, const char *chk, size_t chklen, ne = X509_NAME_get_entry(name, j); str = X509_NAME_ENTRY_get_data(ne); /* Positive on success, negative on error! */ - if ((rv = do_check_string(str, -1, equal, flags, + if ((rv = do_check_string(str, -1, equal, flags, check_type, chk, chklen, peername)) != 0) return rv; } diff --git a/Sources/CNIOBoringSSL/hash.txt b/Sources/CNIOBoringSSL/hash.txt index 5ed4d03e..044bdf0c 100644 --- a/Sources/CNIOBoringSSL/hash.txt +++ b/Sources/CNIOBoringSSL/hash.txt @@ -1 +1 @@ -This directory is derived from BoringSSL cloned from https://boringssl.googlesource.com/boringssl at revision ad9eee1628aa4dac2ac3528cb6bb5ddf27e73560 +This directory is derived from BoringSSL cloned from https://boringssl.googlesource.com/boringssl at revision ff62b38b4b5a0e7926034b5f93d0c276e55b571d diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/aes.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/aes.h index 7f6ff107..55fddcc4 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/aes.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/aes.h @@ -76,18 +76,18 @@ struct aes_key_st { typedef struct aes_key_st AES_KEY; // AES_set_encrypt_key configures |aeskey| to encrypt with the |bits|-bit key, -// |key|. +// |key|. |key| must point to |bits|/8 bytes. It returns zero on success and a +// negative number if |bits| is an invalid AES key size. // -// WARNING: unlike other OpenSSL functions, this returns zero on success and a -// negative number on error. +// WARNING: this function breaks the usual return value convention. OPENSSL_EXPORT int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey); // AES_set_decrypt_key configures |aeskey| to decrypt with the |bits|-bit key, -// |key|. +// |key|. |key| must point to |bits|/8 bytes. It returns zero on success and a +// negative number if |bits| is an invalid AES key size. // -// WARNING: unlike other OpenSSL functions, this returns zero on success and a -// negative number on error. +// WARNING: this function breaks the usual return value convention. OPENSSL_EXPORT int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey); diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/base.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/base.h index 623f02ee..00e9a877 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/base.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/base.h @@ -293,6 +293,24 @@ extern "C" { #endif #endif // OPENSSL_ASM_INCOMPATIBLE +#if defined(__cplusplus) +// enums can be predeclared, but only in C++ and only if given an explicit type. +// C doesn't support setting an explicit type for enums thus a #define is used +// to do this only for C++. However, the ABI type between C and C++ need to have +// equal sizes, which is confirmed in a unittest. +#define BORINGSSL_ENUM_INT : int +enum ssl_early_data_reason_t BORINGSSL_ENUM_INT; +enum ssl_encryption_level_t BORINGSSL_ENUM_INT; +enum ssl_private_key_result_t BORINGSSL_ENUM_INT; +enum ssl_renegotiate_mode_t BORINGSSL_ENUM_INT; +enum ssl_select_cert_result_t BORINGSSL_ENUM_INT; +enum ssl_select_cert_result_t BORINGSSL_ENUM_INT; +enum ssl_ticket_aead_result_t BORINGSSL_ENUM_INT; +enum ssl_verify_result_t BORINGSSL_ENUM_INT; +#else +#define BORINGSSL_ENUM_INT +#endif + // CRYPTO_THREADID is a dummy value. typedef int CRYPTO_THREADID; diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/digest.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/digest.h index 726ec7d0..11328bc6 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/digest.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/digest.h @@ -269,6 +269,11 @@ OPENSSL_EXPORT EVP_MD_CTX *EVP_MD_CTX_create(void); // EVP_MD_CTX_destroy calls |EVP_MD_CTX_free|. OPENSSL_EXPORT void EVP_MD_CTX_destroy(EVP_MD_CTX *ctx); +// EVP_DigestFinalXOF returns zero and adds an error to the error queue. +// BoringSSL does not support any XOF digests. +OPENSSL_EXPORT int EVP_DigestFinalXOF(EVP_MD_CTX *ctx, uint8_t *out, + size_t len); + struct evp_md_pctx_ops; diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/dsa.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/dsa.h index a9a5500d..c978ea3e 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/dsa.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/dsa.h @@ -436,5 +436,6 @@ BSSL_NAMESPACE_END #define DSA_R_BAD_VERSION 104 #define DSA_R_DECODE_ERROR 105 #define DSA_R_ENCODE_ERROR 106 +#define DSA_R_INVALID_PARAMETERS 107 #endif // OPENSSL_HEADER_DSA_H diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/engine.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/engine.h index f4916fcb..a9244237 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/engine.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/engine.h @@ -40,8 +40,8 @@ extern "C" { OPENSSL_EXPORT ENGINE *ENGINE_new(void); // ENGINE_free decrements the reference counts for all methods linked from -// |engine| and frees |engine| itself. -OPENSSL_EXPORT void ENGINE_free(ENGINE *engine); +// |engine| and frees |engine| itself. It returns one. +OPENSSL_EXPORT int ENGINE_free(ENGINE *engine); // Method accessors. diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/evp.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/evp.h index cbfd80ff..8223a581 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/evp.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/evp.h @@ -170,16 +170,6 @@ OPENSSL_EXPORT int EVP_PKEY_assign_EC_KEY(EVP_PKEY *pkey, EC_KEY *key); OPENSSL_EXPORT EC_KEY *EVP_PKEY_get0_EC_KEY(const EVP_PKEY *pkey); OPENSSL_EXPORT EC_KEY *EVP_PKEY_get1_EC_KEY(const EVP_PKEY *pkey); -// EVP_PKEY_new_ed25519_public returns a newly allocated |EVP_PKEY| wrapping an -// Ed25519 public key, or NULL on allocation error. -OPENSSL_EXPORT EVP_PKEY *EVP_PKEY_new_ed25519_public( - const uint8_t public_key[32]); - -// EVP_PKEY_new_ed25519_private returns a newly allocated |EVP_PKEY| wrapping an -// Ed25519 private key, or NULL on allocation error. -OPENSSL_EXPORT EVP_PKEY *EVP_PKEY_new_ed25519_private( - const uint8_t private_key[64]); - #define EVP_PKEY_NONE NID_undef #define EVP_PKEY_RSA NID_rsaEncryption #define EVP_PKEY_RSA_PSS NID_rsassaPss @@ -241,6 +231,48 @@ OPENSSL_EXPORT EVP_PKEY *EVP_parse_private_key(CBS *cbs); OPENSSL_EXPORT int EVP_marshal_private_key(CBB *cbb, const EVP_PKEY *key); +// Raw keys +// +// Some keys types support a "raw" serialization. Currently the only supported +// raw format is Ed25519, where the public key and private key formats are those +// specified in RFC 8032. Note the RFC 8032 private key format is the 32-byte +// prefix of |ED25519_sign|'s 64-byte private key. + +// EVP_PKEY_new_raw_private_key returns a newly allocated |EVP_PKEY| wrapping a +// private key of the specified type. It returns one on success and zero on +// error. +OPENSSL_EXPORT EVP_PKEY *EVP_PKEY_new_raw_private_key(int type, ENGINE *unused, + const uint8_t *in, + size_t len); + +// EVP_PKEY_new_raw_public_key returns a newly allocated |EVP_PKEY| wrapping a +// public key of the specified type. It returns one on success and zero on +// error. +OPENSSL_EXPORT EVP_PKEY *EVP_PKEY_new_raw_public_key(int type, ENGINE *unused, + const uint8_t *in, + size_t len); + +// EVP_PKEY_get_raw_private_key outputs the private key for |pkey| in raw form. +// If |out| is NULL, it sets |*out_len| to the size of the raw private key. +// Otherwise, it writes at most |*out_len| bytes to |out| and sets |*out_len| to +// the number of bytes written. +// +// It returns one on success and zero if |pkey| has no private key, the key +// type does not support a raw format, or the buffer is too small. +OPENSSL_EXPORT int EVP_PKEY_get_raw_private_key(const EVP_PKEY *pkey, + uint8_t *out, size_t *out_len); + +// EVP_PKEY_get_raw_public_key outputs the public key for |pkey| in raw form. +// If |out| is NULL, it sets |*out_len| to the size of the raw public key. +// Otherwise, it writes at most |*out_len| bytes to |out| and sets |*out_len| to +// the number of bytes written. +// +// It returns one on success and zero if |pkey| has no public key, the key +// type does not support a raw format, or the buffer is too small. +OPENSSL_EXPORT int EVP_PKEY_get_raw_public_key(const EVP_PKEY *pkey, + uint8_t *out, size_t *out_len); + + // Signing // EVP_DigestSignInit sets up |ctx| for a signing operation with |type| and diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/ssl.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/ssl.h index fbaccaf0..8f4e06d3 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/ssl.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/ssl.h @@ -1160,7 +1160,7 @@ OPENSSL_EXPORT void *SSL_CTX_get_default_passwd_cb_userdata(const SSL_CTX *ctx); // Custom private keys. -enum ssl_private_key_result_t { +enum ssl_private_key_result_t BORINGSSL_ENUM_INT { ssl_private_key_success, ssl_private_key_retry, ssl_private_key_failure, @@ -2111,7 +2111,7 @@ OPENSSL_EXPORT int SSL_CTX_set_tlsext_ticket_key_cb( // ssl_ticket_aead_result_t enumerates the possible results from decrypting a // ticket with an |SSL_TICKET_AEAD_METHOD|. -enum ssl_ticket_aead_result_t { +enum ssl_ticket_aead_result_t BORINGSSL_ENUM_INT { // ssl_ticket_aead_success indicates that the ticket was successfully // decrypted. ssl_ticket_aead_success, @@ -2285,7 +2285,7 @@ OPENSSL_EXPORT void SSL_set_verify(SSL *ssl, int mode, int (*callback)(int ok, X509_STORE_CTX *store_ctx)); -enum ssl_verify_result_t { +enum ssl_verify_result_t BORINGSSL_ENUM_INT { ssl_verify_ok, ssl_verify_invalid, ssl_verify_retry, @@ -3135,7 +3135,7 @@ OPENSSL_EXPORT int SSL_set1_delegated_credential( // ssl_encryption_level_t represents a specific QUIC encryption level used to // transmit handshake messages. -enum ssl_encryption_level_t { +enum ssl_encryption_level_t BORINGSSL_ENUM_INT { ssl_encryption_initial = 0, ssl_encryption_early_data, ssl_encryption_handshake, @@ -3311,6 +3311,46 @@ OPENSSL_EXPORT int SSL_export_early_keying_material( SSL *ssl, uint8_t *out, size_t out_len, const char *label, size_t label_len, const uint8_t *context, size_t context_len); +// SSL_get_ticket_age_skew returns the difference, in seconds, between the +// client-sent ticket age and the server-computed value in TLS 1.3 server +// connections which resumed a session. +OPENSSL_EXPORT int32_t SSL_get_ticket_age_skew(const SSL *ssl); + +enum ssl_early_data_reason_t BORINGSSL_ENUM_INT { + // The handshake has not progressed far enough for the 0-RTT status to be + // known. + ssl_early_data_unknown, + // 0-RTT is disabled for this connection. + ssl_early_data_disabled, + // 0-RTT was accepted. + ssl_early_data_accepted, + // The negotiated protocol version does not support 0-RTT. + ssl_early_data_protocol_version, + // The peer declined to offer or accept 0-RTT for an unknown reason. + ssl_early_data_peer_declined, + // The client did not offer a session. + ssl_early_data_no_session_offered, + // The server declined to resume the session. + ssl_early_data_session_not_resumed, + // The session does not support 0-RTT. + ssl_early_data_unsupported_for_session, + // The server sent a HelloRetryRequest. + ssl_early_data_hello_retry_request, + // The negotiated ALPN protocol did not match the session. + ssl_early_data_alpn_mismatch, + // The connection negotiated Channel ID, which is incompatible with 0-RTT. + ssl_early_data_channel_id, + // The connection negotiated token binding, which is incompatible with 0-RTT. + ssl_early_data_token_binding, + // The client and server ticket age were too far apart. + ssl_early_data_ticket_age_skew, +}; + +// SSL_get_early_data_reason returns details why 0-RTT was accepted or rejected +// on |ssl|. This is primarily useful on the server. +OPENSSL_EXPORT enum ssl_early_data_reason_t SSL_get_early_data_reason( + const SSL *ssl); + // Alerts. // @@ -3522,7 +3562,7 @@ OPENSSL_EXPORT void SSL_CTX_set_current_time_cb( // such as HTTP/1.1, and not others, such as HTTP/2. OPENSSL_EXPORT void SSL_set_shed_handshake_config(SSL *ssl, int enable); -enum ssl_renegotiate_mode_t { +enum ssl_renegotiate_mode_t BORINGSSL_ENUM_INT { ssl_renegotiate_never = 0, ssl_renegotiate_once, ssl_renegotiate_freely, @@ -3620,7 +3660,7 @@ typedef struct ssl_early_callback_ctx { // ssl_select_cert_result_t enumerates the possible results from selecting a // certificate with |select_certificate_cb|. -enum ssl_select_cert_result_t { +enum ssl_select_cert_result_t BORINGSSL_ENUM_INT { // ssl_select_cert_success indicates that the certificate selection was // successful. ssl_select_cert_success = 1, @@ -3815,11 +3855,6 @@ OPENSSL_EXPORT void SSL_CTX_set_grease_enabled(SSL_CTX *ctx, int enabled); // record with |ssl|. OPENSSL_EXPORT size_t SSL_max_seal_overhead(const SSL *ssl); -// SSL_get_ticket_age_skew returns the difference, in seconds, between the -// client-sent ticket age and the server-computed value in TLS 1.3 server -// connections which resumed a session. -OPENSSL_EXPORT int32_t SSL_get_ticket_age_skew(const SSL *ssl); - // SSL_CTX_set_false_start_allowed_without_alpn configures whether connections // on |ctx| may use False Start (if |SSL_MODE_ENABLE_FALSE_START| is enabled) // without negotiating ALPN. @@ -4757,7 +4792,8 @@ OPENSSL_EXPORT bool SealRecord(SSL *ssl, Span out_prefix, OPENSSL_EXPORT void SSL_CTX_set_handoff_mode(SSL_CTX *ctx, bool on); OPENSSL_EXPORT void SSL_set_handoff_mode(SSL *SSL, bool on); -OPENSSL_EXPORT bool SSL_serialize_handoff(const SSL *ssl, CBB *out); +OPENSSL_EXPORT bool SSL_serialize_handoff(const SSL *ssl, CBB *out, + SSL_CLIENT_HELLO *out_hello); OPENSSL_EXPORT bool SSL_decline_handoff(SSL *ssl); OPENSSL_EXPORT bool SSL_apply_handoff(SSL *ssl, Span handoff); OPENSSL_EXPORT bool SSL_serialize_handback(const SSL *ssl, CBB *out); diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509_vfy.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509_vfy.h index 2dc2dd21..1120802e 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509_vfy.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509_vfy.h @@ -370,6 +370,8 @@ OPENSSL_EXPORT void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); /* Issuer lookup error */ #define X509_V_ERR_STORE_LOOKUP 66 +#define X509_V_ERR_NAME_CONSTRAINTS_WITHOUT_SANS 67 + /* Certificate verify flags */ /* Send issuer+subject checks to verify_cb */ diff --git a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509v3.h b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509v3.h index 2d19825b..96b7594b 100644 --- a/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509v3.h +++ b/Sources/CNIOBoringSSL/include/CNIOBoringSSL/x509v3.h @@ -703,8 +703,8 @@ OPENSSL_EXPORT void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); OPENSSL_EXPORT STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); /* Flags for X509_check_* functions */ -/* Always check subject name for host match even if subject alt names present */ -#define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0x1 +/* Deprecated: this flag does nothing */ +#define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0 /* Disable wildcard matching for dnsName fields and common name. */ #define X509_CHECK_FLAG_NO_WILDCARDS 0x2 /* Wildcards must not match a partial label. */ @@ -713,6 +713,8 @@ OPENSSL_EXPORT STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x); #define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8 /* Constraint verifier subdomain patterns to match a single labels. */ #define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10 +/* Skip the subject common name fallback if subjectAltNames is missing. */ +#define X509_CHECK_FLAG_NEVER_CHECK_SUBJECT 0x20 /* * Match reference identifiers starting with "." to any sub-domain. * This is a non-public flag, turned on implicitly when the subject diff --git a/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols.h b/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols.h index fd6251d6..600c9856 100644 --- a/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols.h +++ b/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols.h @@ -908,6 +908,7 @@ #define EVP_DecryptUpdate BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_DecryptUpdate) #define EVP_Digest BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_Digest) #define EVP_DigestFinal BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_DigestFinal) +#define EVP_DigestFinalXOF BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_DigestFinalXOF) #define EVP_DigestFinal_ex BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_DigestFinal_ex) #define EVP_DigestInit BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_DigestInit) #define EVP_DigestInit_ex BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_DigestInit_ex) @@ -995,14 +996,16 @@ #define EVP_PKEY_get1_DSA BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_get1_DSA) #define EVP_PKEY_get1_EC_KEY BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_get1_EC_KEY) #define EVP_PKEY_get1_RSA BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_get1_RSA) +#define EVP_PKEY_get_raw_private_key BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_get_raw_private_key) +#define EVP_PKEY_get_raw_public_key BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_get_raw_public_key) #define EVP_PKEY_id BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_id) #define EVP_PKEY_is_opaque BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_is_opaque) #define EVP_PKEY_keygen BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_keygen) #define EVP_PKEY_keygen_init BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_keygen_init) #define EVP_PKEY_missing_parameters BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_missing_parameters) #define EVP_PKEY_new BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_new) -#define EVP_PKEY_new_ed25519_private BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_new_ed25519_private) -#define EVP_PKEY_new_ed25519_public BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_new_ed25519_public) +#define EVP_PKEY_new_raw_private_key BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_new_raw_private_key) +#define EVP_PKEY_new_raw_public_key BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_new_raw_public_key) #define EVP_PKEY_paramgen BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_paramgen) #define EVP_PKEY_paramgen_init BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_paramgen_init) #define EVP_PKEY_print_params BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, EVP_PKEY_print_params) @@ -1502,6 +1505,9 @@ #define SHA512_Init BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SHA512_Init) #define SHA512_Transform BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SHA512_Transform) #define SHA512_Update BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SHA512_Update) +#define SIKE_decaps BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SIKE_decaps) +#define SIKE_encaps BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SIKE_encaps) +#define SIKE_keypair BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SIKE_keypair) #define SPAKE2_CTX_free BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SPAKE2_CTX_free) #define SPAKE2_CTX_new BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SPAKE2_CTX_new) #define SPAKE2_generate_msg BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SPAKE2_generate_msg) @@ -1784,6 +1790,7 @@ #define SSL_get_curve_id BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_curve_id) #define SSL_get_curve_name BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_curve_name) #define SSL_get_default_timeout BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_default_timeout) +#define SSL_get_early_data_reason BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_early_data_reason) #define SSL_get_error BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_error) #define SSL_get_ex_data BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_ex_data) #define SSL_get_ex_data_X509_STORE_CTX_idx BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, SSL_get_ex_data_X509_STORE_CTX_idx) @@ -2852,6 +2859,8 @@ #define ecp_nistz256_sqr_mont BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, ecp_nistz256_sqr_mont) #define ed25519_asn1_meth BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, ed25519_asn1_meth) #define ed25519_pkey_meth BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, ed25519_pkey_meth) +#define eval_3_isog BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, eval_3_isog) +#define eval_4_isog BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, eval_4_isog) #define gcm_ghash_4bit BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, gcm_ghash_4bit) #define gcm_ghash_avx BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, gcm_ghash_avx) #define gcm_ghash_clmul BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, gcm_ghash_clmul) @@ -2864,6 +2873,9 @@ #define gcm_init_avx BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, gcm_init_avx) #define gcm_init_clmul BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, gcm_init_clmul) #define gcm_init_ssse3 BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, gcm_init_ssse3) +#define get_3_isog BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, get_3_isog) +#define get_4_isog BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, get_4_isog) +#define get_A BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, get_A) #define i2a_ACCESS_DESCRIPTION BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, i2a_ACCESS_DESCRIPTION) #define i2a_ASN1_ENUMERATED BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, i2a_ASN1_ENUMERATED) #define i2a_ASN1_INTEGER BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, i2a_ASN1_INTEGER) @@ -3011,6 +3023,8 @@ #define i2v_ASN1_BIT_STRING BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, i2v_ASN1_BIT_STRING) #define i2v_GENERAL_NAME BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, i2v_GENERAL_NAME) #define i2v_GENERAL_NAMES BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, i2v_GENERAL_NAMES) +#define inv_3_way BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, inv_3_way) +#define j_inv BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, j_inv) #define kBoringSSLRSASqrtTwo BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, kBoringSSLRSASqrtTwo) #define kBoringSSLRSASqrtTwoLen BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, kBoringSSLRSASqrtTwoLen) #define kOpenSSLReasonStringData BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, kOpenSSLReasonStringData) @@ -3030,6 +3044,7 @@ #define md4_block_data_order BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, md4_block_data_order) #define md5_block_asm_data_order BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, md5_block_asm_data_order) #define o2i_ECPublicKey BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, o2i_ECPublicKey) +#define p503 BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, p503) #define pkcs12_iterations_acceptable BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, pkcs12_iterations_acceptable) #define pkcs12_key_gen BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, pkcs12_key_gen) #define pkcs12_pbe_encrypt_init BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, pkcs12_pbe_encrypt_init) @@ -3063,6 +3078,23 @@ #define sha1_block_data_order BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sha1_block_data_order) #define sha256_block_data_order BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sha256_block_data_order) #define sha512_block_data_order BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sha512_block_data_order) +#define sike_cswap_asm BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_cswap_asm) +#define sike_fp2inv_mont BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fp2inv_mont) +#define sike_fp2mul_mont BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fp2mul_mont) +#define sike_fp2sqr_mont BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fp2sqr_mont) +#define sike_fpadd BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpadd) +#define sike_fpcopy BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpcopy) +#define sike_fpcorrection BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpcorrection) +#define sike_fpdiv2 BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpdiv2) +#define sike_fpmul_mont BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpmul_mont) +#define sike_fpneg BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpneg) +#define sike_fprdc BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fprdc) +#define sike_fpsub BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_fpsub) +#define sike_from_mont BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_from_mont) +#define sike_mpadd_asm BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_mpadd_asm) +#define sike_mpdblsubx2_asm BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_mpdblsubx2_asm) +#define sike_mpmul BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_mpmul) +#define sike_mpsubx2_asm BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sike_mpsubx2_asm) #define sk_CRYPTO_BUFFER_call_copy_func BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sk_CRYPTO_BUFFER_call_copy_func) #define sk_CRYPTO_BUFFER_call_free_func BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sk_CRYPTO_BUFFER_call_free_func) #define sk_CRYPTO_BUFFER_deep_copy BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sk_CRYPTO_BUFFER_deep_copy) @@ -3177,7 +3209,12 @@ #define x509_rsa_pss_to_ctx BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, x509_rsa_pss_to_ctx) #define x509v3_bytes_to_hex BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, x509v3_bytes_to_hex) #define x509v3_hex_to_bytes BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, x509v3_hex_to_bytes) +#define x509v3_looks_like_dns_name BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, x509v3_looks_like_dns_name) #define x509v3_name_cmp BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, x509v3_name_cmp) +#define xDBLADD BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, xDBLADD) +#define xDBLe BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, xDBLe) +#define xTPL BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, xTPL) +#define xTPLe BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, xTPLe) #define sk_X509_ALGOR_call_free_func BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sk_X509_ALGOR_call_free_func) #define sk_X509_ALGOR_call_copy_func BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sk_X509_ALGOR_call_copy_func) #define sk_X509_ALGOR_call_cmp_func BORINGSSL_ADD_PREFIX(BORINGSSL_PREFIX, sk_X509_ALGOR_call_cmp_func) diff --git a/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_asm.h b/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_asm.h index 5485d877..4559a5ee 100644 --- a/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_asm.h +++ b/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_asm.h @@ -913,6 +913,7 @@ #define _EVP_DecryptUpdate BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_DecryptUpdate) #define _EVP_Digest BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_Digest) #define _EVP_DigestFinal BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_DigestFinal) +#define _EVP_DigestFinalXOF BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_DigestFinalXOF) #define _EVP_DigestFinal_ex BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_DigestFinal_ex) #define _EVP_DigestInit BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_DigestInit) #define _EVP_DigestInit_ex BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_DigestInit_ex) @@ -1000,14 +1001,16 @@ #define _EVP_PKEY_get1_DSA BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_get1_DSA) #define _EVP_PKEY_get1_EC_KEY BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_get1_EC_KEY) #define _EVP_PKEY_get1_RSA BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_get1_RSA) +#define _EVP_PKEY_get_raw_private_key BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_get_raw_private_key) +#define _EVP_PKEY_get_raw_public_key BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_get_raw_public_key) #define _EVP_PKEY_id BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_id) #define _EVP_PKEY_is_opaque BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_is_opaque) #define _EVP_PKEY_keygen BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_keygen) #define _EVP_PKEY_keygen_init BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_keygen_init) #define _EVP_PKEY_missing_parameters BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_missing_parameters) #define _EVP_PKEY_new BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_new) -#define _EVP_PKEY_new_ed25519_private BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_new_ed25519_private) -#define _EVP_PKEY_new_ed25519_public BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_new_ed25519_public) +#define _EVP_PKEY_new_raw_private_key BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_new_raw_private_key) +#define _EVP_PKEY_new_raw_public_key BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_new_raw_public_key) #define _EVP_PKEY_paramgen BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_paramgen) #define _EVP_PKEY_paramgen_init BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_paramgen_init) #define _EVP_PKEY_print_params BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, EVP_PKEY_print_params) @@ -1507,6 +1510,9 @@ #define _SHA512_Init BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SHA512_Init) #define _SHA512_Transform BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SHA512_Transform) #define _SHA512_Update BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SHA512_Update) +#define _SIKE_decaps BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SIKE_decaps) +#define _SIKE_encaps BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SIKE_encaps) +#define _SIKE_keypair BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SIKE_keypair) #define _SPAKE2_CTX_free BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SPAKE2_CTX_free) #define _SPAKE2_CTX_new BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SPAKE2_CTX_new) #define _SPAKE2_generate_msg BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SPAKE2_generate_msg) @@ -1789,6 +1795,7 @@ #define _SSL_get_curve_id BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_curve_id) #define _SSL_get_curve_name BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_curve_name) #define _SSL_get_default_timeout BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_default_timeout) +#define _SSL_get_early_data_reason BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_early_data_reason) #define _SSL_get_error BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_error) #define _SSL_get_ex_data BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_ex_data) #define _SSL_get_ex_data_X509_STORE_CTX_idx BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, SSL_get_ex_data_X509_STORE_CTX_idx) @@ -2857,6 +2864,8 @@ #define _ecp_nistz256_sqr_mont BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, ecp_nistz256_sqr_mont) #define _ed25519_asn1_meth BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, ed25519_asn1_meth) #define _ed25519_pkey_meth BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, ed25519_pkey_meth) +#define _eval_3_isog BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, eval_3_isog) +#define _eval_4_isog BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, eval_4_isog) #define _gcm_ghash_4bit BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, gcm_ghash_4bit) #define _gcm_ghash_avx BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, gcm_ghash_avx) #define _gcm_ghash_clmul BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, gcm_ghash_clmul) @@ -2869,6 +2878,9 @@ #define _gcm_init_avx BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, gcm_init_avx) #define _gcm_init_clmul BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, gcm_init_clmul) #define _gcm_init_ssse3 BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, gcm_init_ssse3) +#define _get_3_isog BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, get_3_isog) +#define _get_4_isog BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, get_4_isog) +#define _get_A BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, get_A) #define _i2a_ACCESS_DESCRIPTION BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, i2a_ACCESS_DESCRIPTION) #define _i2a_ASN1_ENUMERATED BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, i2a_ASN1_ENUMERATED) #define _i2a_ASN1_INTEGER BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, i2a_ASN1_INTEGER) @@ -3016,6 +3028,8 @@ #define _i2v_ASN1_BIT_STRING BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, i2v_ASN1_BIT_STRING) #define _i2v_GENERAL_NAME BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, i2v_GENERAL_NAME) #define _i2v_GENERAL_NAMES BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, i2v_GENERAL_NAMES) +#define _inv_3_way BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, inv_3_way) +#define _j_inv BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, j_inv) #define _kBoringSSLRSASqrtTwo BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, kBoringSSLRSASqrtTwo) #define _kBoringSSLRSASqrtTwoLen BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, kBoringSSLRSASqrtTwoLen) #define _kOpenSSLReasonStringData BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, kOpenSSLReasonStringData) @@ -3035,6 +3049,7 @@ #define _md4_block_data_order BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, md4_block_data_order) #define _md5_block_asm_data_order BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, md5_block_asm_data_order) #define _o2i_ECPublicKey BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, o2i_ECPublicKey) +#define _p503 BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, p503) #define _pkcs12_iterations_acceptable BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, pkcs12_iterations_acceptable) #define _pkcs12_key_gen BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, pkcs12_key_gen) #define _pkcs12_pbe_encrypt_init BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, pkcs12_pbe_encrypt_init) @@ -3068,6 +3083,23 @@ #define _sha1_block_data_order BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sha1_block_data_order) #define _sha256_block_data_order BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sha256_block_data_order) #define _sha512_block_data_order BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sha512_block_data_order) +#define _sike_cswap_asm BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_cswap_asm) +#define _sike_fp2inv_mont BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fp2inv_mont) +#define _sike_fp2mul_mont BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fp2mul_mont) +#define _sike_fp2sqr_mont BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fp2sqr_mont) +#define _sike_fpadd BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpadd) +#define _sike_fpcopy BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpcopy) +#define _sike_fpcorrection BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpcorrection) +#define _sike_fpdiv2 BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpdiv2) +#define _sike_fpmul_mont BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpmul_mont) +#define _sike_fpneg BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpneg) +#define _sike_fprdc BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fprdc) +#define _sike_fpsub BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_fpsub) +#define _sike_from_mont BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_from_mont) +#define _sike_mpadd_asm BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_mpadd_asm) +#define _sike_mpdblsubx2_asm BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_mpdblsubx2_asm) +#define _sike_mpmul BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_mpmul) +#define _sike_mpsubx2_asm BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sike_mpsubx2_asm) #define _sk_CRYPTO_BUFFER_call_copy_func BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sk_CRYPTO_BUFFER_call_copy_func) #define _sk_CRYPTO_BUFFER_call_free_func BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sk_CRYPTO_BUFFER_call_free_func) #define _sk_CRYPTO_BUFFER_deep_copy BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, sk_CRYPTO_BUFFER_deep_copy) @@ -3182,5 +3214,10 @@ #define _x509_rsa_pss_to_ctx BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, x509_rsa_pss_to_ctx) #define _x509v3_bytes_to_hex BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, x509v3_bytes_to_hex) #define _x509v3_hex_to_bytes BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, x509v3_hex_to_bytes) +#define _x509v3_looks_like_dns_name BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, x509v3_looks_like_dns_name) #define _x509v3_name_cmp BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, x509v3_name_cmp) +#define _xDBLADD BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, xDBLADD) +#define _xDBLe BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, xDBLe) +#define _xTPL BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, xTPL) +#define _xTPLe BORINGSSL_ADD_PREFIX_MAC_ASM(BORINGSSL_PREFIX, xTPLe) #endif diff --git a/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_nasm.inc b/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_nasm.inc index 678fa293..86837d90 100644 --- a/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_nasm.inc +++ b/Sources/CNIOBoringSSL/include/boringssl_prefix_symbols_nasm.inc @@ -905,6 +905,7 @@ %xdefine _EVP_DecryptUpdate _ %+ BORINGSSL_PREFIX %+ _EVP_DecryptUpdate %xdefine _EVP_Digest _ %+ BORINGSSL_PREFIX %+ _EVP_Digest %xdefine _EVP_DigestFinal _ %+ BORINGSSL_PREFIX %+ _EVP_DigestFinal +%xdefine _EVP_DigestFinalXOF _ %+ BORINGSSL_PREFIX %+ _EVP_DigestFinalXOF %xdefine _EVP_DigestFinal_ex _ %+ BORINGSSL_PREFIX %+ _EVP_DigestFinal_ex %xdefine _EVP_DigestInit _ %+ BORINGSSL_PREFIX %+ _EVP_DigestInit %xdefine _EVP_DigestInit_ex _ %+ BORINGSSL_PREFIX %+ _EVP_DigestInit_ex @@ -992,14 +993,16 @@ %xdefine _EVP_PKEY_get1_DSA _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_get1_DSA %xdefine _EVP_PKEY_get1_EC_KEY _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_get1_EC_KEY %xdefine _EVP_PKEY_get1_RSA _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_get1_RSA +%xdefine _EVP_PKEY_get_raw_private_key _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_get_raw_private_key +%xdefine _EVP_PKEY_get_raw_public_key _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_get_raw_public_key %xdefine _EVP_PKEY_id _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_id %xdefine _EVP_PKEY_is_opaque _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_is_opaque %xdefine _EVP_PKEY_keygen _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_keygen %xdefine _EVP_PKEY_keygen_init _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_keygen_init %xdefine _EVP_PKEY_missing_parameters _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_missing_parameters %xdefine _EVP_PKEY_new _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_new -%xdefine _EVP_PKEY_new_ed25519_private _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_new_ed25519_private -%xdefine _EVP_PKEY_new_ed25519_public _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_new_ed25519_public +%xdefine _EVP_PKEY_new_raw_private_key _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_new_raw_private_key +%xdefine _EVP_PKEY_new_raw_public_key _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_new_raw_public_key %xdefine _EVP_PKEY_paramgen _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_paramgen %xdefine _EVP_PKEY_paramgen_init _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_paramgen_init %xdefine _EVP_PKEY_print_params _ %+ BORINGSSL_PREFIX %+ _EVP_PKEY_print_params @@ -1499,6 +1502,9 @@ %xdefine _SHA512_Init _ %+ BORINGSSL_PREFIX %+ _SHA512_Init %xdefine _SHA512_Transform _ %+ BORINGSSL_PREFIX %+ _SHA512_Transform %xdefine _SHA512_Update _ %+ BORINGSSL_PREFIX %+ _SHA512_Update +%xdefine _SIKE_decaps _ %+ BORINGSSL_PREFIX %+ _SIKE_decaps +%xdefine _SIKE_encaps _ %+ BORINGSSL_PREFIX %+ _SIKE_encaps +%xdefine _SIKE_keypair _ %+ BORINGSSL_PREFIX %+ _SIKE_keypair %xdefine _SPAKE2_CTX_free _ %+ BORINGSSL_PREFIX %+ _SPAKE2_CTX_free %xdefine _SPAKE2_CTX_new _ %+ BORINGSSL_PREFIX %+ _SPAKE2_CTX_new %xdefine _SPAKE2_generate_msg _ %+ BORINGSSL_PREFIX %+ _SPAKE2_generate_msg @@ -1781,6 +1787,7 @@ %xdefine _SSL_get_curve_id _ %+ BORINGSSL_PREFIX %+ _SSL_get_curve_id %xdefine _SSL_get_curve_name _ %+ BORINGSSL_PREFIX %+ _SSL_get_curve_name %xdefine _SSL_get_default_timeout _ %+ BORINGSSL_PREFIX %+ _SSL_get_default_timeout +%xdefine _SSL_get_early_data_reason _ %+ BORINGSSL_PREFIX %+ _SSL_get_early_data_reason %xdefine _SSL_get_error _ %+ BORINGSSL_PREFIX %+ _SSL_get_error %xdefine _SSL_get_ex_data _ %+ BORINGSSL_PREFIX %+ _SSL_get_ex_data %xdefine _SSL_get_ex_data_X509_STORE_CTX_idx _ %+ BORINGSSL_PREFIX %+ _SSL_get_ex_data_X509_STORE_CTX_idx @@ -2849,6 +2856,8 @@ %xdefine _ecp_nistz256_sqr_mont _ %+ BORINGSSL_PREFIX %+ _ecp_nistz256_sqr_mont %xdefine _ed25519_asn1_meth _ %+ BORINGSSL_PREFIX %+ _ed25519_asn1_meth %xdefine _ed25519_pkey_meth _ %+ BORINGSSL_PREFIX %+ _ed25519_pkey_meth +%xdefine _eval_3_isog _ %+ BORINGSSL_PREFIX %+ _eval_3_isog +%xdefine _eval_4_isog _ %+ BORINGSSL_PREFIX %+ _eval_4_isog %xdefine _gcm_ghash_4bit _ %+ BORINGSSL_PREFIX %+ _gcm_ghash_4bit %xdefine _gcm_ghash_avx _ %+ BORINGSSL_PREFIX %+ _gcm_ghash_avx %xdefine _gcm_ghash_clmul _ %+ BORINGSSL_PREFIX %+ _gcm_ghash_clmul @@ -2861,6 +2870,9 @@ %xdefine _gcm_init_avx _ %+ BORINGSSL_PREFIX %+ _gcm_init_avx %xdefine _gcm_init_clmul _ %+ BORINGSSL_PREFIX %+ _gcm_init_clmul %xdefine _gcm_init_ssse3 _ %+ BORINGSSL_PREFIX %+ _gcm_init_ssse3 +%xdefine _get_3_isog _ %+ BORINGSSL_PREFIX %+ _get_3_isog +%xdefine _get_4_isog _ %+ BORINGSSL_PREFIX %+ _get_4_isog +%xdefine _get_A _ %+ BORINGSSL_PREFIX %+ _get_A %xdefine _i2a_ACCESS_DESCRIPTION _ %+ BORINGSSL_PREFIX %+ _i2a_ACCESS_DESCRIPTION %xdefine _i2a_ASN1_ENUMERATED _ %+ BORINGSSL_PREFIX %+ _i2a_ASN1_ENUMERATED %xdefine _i2a_ASN1_INTEGER _ %+ BORINGSSL_PREFIX %+ _i2a_ASN1_INTEGER @@ -3008,6 +3020,8 @@ %xdefine _i2v_ASN1_BIT_STRING _ %+ BORINGSSL_PREFIX %+ _i2v_ASN1_BIT_STRING %xdefine _i2v_GENERAL_NAME _ %+ BORINGSSL_PREFIX %+ _i2v_GENERAL_NAME %xdefine _i2v_GENERAL_NAMES _ %+ BORINGSSL_PREFIX %+ _i2v_GENERAL_NAMES +%xdefine _inv_3_way _ %+ BORINGSSL_PREFIX %+ _inv_3_way +%xdefine _j_inv _ %+ BORINGSSL_PREFIX %+ _j_inv %xdefine _kBoringSSLRSASqrtTwo _ %+ BORINGSSL_PREFIX %+ _kBoringSSLRSASqrtTwo %xdefine _kBoringSSLRSASqrtTwoLen _ %+ BORINGSSL_PREFIX %+ _kBoringSSLRSASqrtTwoLen %xdefine _kOpenSSLReasonStringData _ %+ BORINGSSL_PREFIX %+ _kOpenSSLReasonStringData @@ -3027,6 +3041,7 @@ %xdefine _md4_block_data_order _ %+ BORINGSSL_PREFIX %+ _md4_block_data_order %xdefine _md5_block_asm_data_order _ %+ BORINGSSL_PREFIX %+ _md5_block_asm_data_order %xdefine _o2i_ECPublicKey _ %+ BORINGSSL_PREFIX %+ _o2i_ECPublicKey +%xdefine _p503 _ %+ BORINGSSL_PREFIX %+ _p503 %xdefine _pkcs12_iterations_acceptable _ %+ BORINGSSL_PREFIX %+ _pkcs12_iterations_acceptable %xdefine _pkcs12_key_gen _ %+ BORINGSSL_PREFIX %+ _pkcs12_key_gen %xdefine _pkcs12_pbe_encrypt_init _ %+ BORINGSSL_PREFIX %+ _pkcs12_pbe_encrypt_init @@ -3060,6 +3075,23 @@ %xdefine _sha1_block_data_order _ %+ BORINGSSL_PREFIX %+ _sha1_block_data_order %xdefine _sha256_block_data_order _ %+ BORINGSSL_PREFIX %+ _sha256_block_data_order %xdefine _sha512_block_data_order _ %+ BORINGSSL_PREFIX %+ _sha512_block_data_order +%xdefine _sike_cswap_asm _ %+ BORINGSSL_PREFIX %+ _sike_cswap_asm +%xdefine _sike_fp2inv_mont _ %+ BORINGSSL_PREFIX %+ _sike_fp2inv_mont +%xdefine _sike_fp2mul_mont _ %+ BORINGSSL_PREFIX %+ _sike_fp2mul_mont +%xdefine _sike_fp2sqr_mont _ %+ BORINGSSL_PREFIX %+ _sike_fp2sqr_mont +%xdefine _sike_fpadd _ %+ BORINGSSL_PREFIX %+ _sike_fpadd +%xdefine _sike_fpcopy _ %+ BORINGSSL_PREFIX %+ _sike_fpcopy +%xdefine _sike_fpcorrection _ %+ BORINGSSL_PREFIX %+ _sike_fpcorrection +%xdefine _sike_fpdiv2 _ %+ BORINGSSL_PREFIX %+ _sike_fpdiv2 +%xdefine _sike_fpmul_mont _ %+ BORINGSSL_PREFIX %+ _sike_fpmul_mont +%xdefine _sike_fpneg _ %+ BORINGSSL_PREFIX %+ _sike_fpneg +%xdefine _sike_fprdc _ %+ BORINGSSL_PREFIX %+ _sike_fprdc +%xdefine _sike_fpsub _ %+ BORINGSSL_PREFIX %+ _sike_fpsub +%xdefine _sike_from_mont _ %+ BORINGSSL_PREFIX %+ _sike_from_mont +%xdefine _sike_mpadd_asm _ %+ BORINGSSL_PREFIX %+ _sike_mpadd_asm +%xdefine _sike_mpdblsubx2_asm _ %+ BORINGSSL_PREFIX %+ _sike_mpdblsubx2_asm +%xdefine _sike_mpmul _ %+ BORINGSSL_PREFIX %+ _sike_mpmul +%xdefine _sike_mpsubx2_asm _ %+ BORINGSSL_PREFIX %+ _sike_mpsubx2_asm %xdefine _sk_CRYPTO_BUFFER_call_copy_func _ %+ BORINGSSL_PREFIX %+ _sk_CRYPTO_BUFFER_call_copy_func %xdefine _sk_CRYPTO_BUFFER_call_free_func _ %+ BORINGSSL_PREFIX %+ _sk_CRYPTO_BUFFER_call_free_func %xdefine _sk_CRYPTO_BUFFER_deep_copy _ %+ BORINGSSL_PREFIX %+ _sk_CRYPTO_BUFFER_deep_copy @@ -3174,7 +3206,12 @@ %xdefine _x509_rsa_pss_to_ctx _ %+ BORINGSSL_PREFIX %+ _x509_rsa_pss_to_ctx %xdefine _x509v3_bytes_to_hex _ %+ BORINGSSL_PREFIX %+ _x509v3_bytes_to_hex %xdefine _x509v3_hex_to_bytes _ %+ BORINGSSL_PREFIX %+ _x509v3_hex_to_bytes +%xdefine _x509v3_looks_like_dns_name _ %+ BORINGSSL_PREFIX %+ _x509v3_looks_like_dns_name %xdefine _x509v3_name_cmp _ %+ BORINGSSL_PREFIX %+ _x509v3_name_cmp +%xdefine _xDBLADD _ %+ BORINGSSL_PREFIX %+ _xDBLADD +%xdefine _xDBLe _ %+ BORINGSSL_PREFIX %+ _xDBLe +%xdefine _xTPL _ %+ BORINGSSL_PREFIX %+ _xTPL +%xdefine _xTPLe _ %+ BORINGSSL_PREFIX %+ _xTPLe %else %xdefine ACCESS_DESCRIPTION_free BORINGSSL_PREFIX %+ _ACCESS_DESCRIPTION_free %xdefine ACCESS_DESCRIPTION_it BORINGSSL_PREFIX %+ _ACCESS_DESCRIPTION_it @@ -4067,6 +4104,7 @@ %xdefine EVP_DecryptUpdate BORINGSSL_PREFIX %+ _EVP_DecryptUpdate %xdefine EVP_Digest BORINGSSL_PREFIX %+ _EVP_Digest %xdefine EVP_DigestFinal BORINGSSL_PREFIX %+ _EVP_DigestFinal +%xdefine EVP_DigestFinalXOF BORINGSSL_PREFIX %+ _EVP_DigestFinalXOF %xdefine EVP_DigestFinal_ex BORINGSSL_PREFIX %+ _EVP_DigestFinal_ex %xdefine EVP_DigestInit BORINGSSL_PREFIX %+ _EVP_DigestInit %xdefine EVP_DigestInit_ex BORINGSSL_PREFIX %+ _EVP_DigestInit_ex @@ -4154,14 +4192,16 @@ %xdefine EVP_PKEY_get1_DSA BORINGSSL_PREFIX %+ _EVP_PKEY_get1_DSA %xdefine EVP_PKEY_get1_EC_KEY BORINGSSL_PREFIX %+ _EVP_PKEY_get1_EC_KEY %xdefine EVP_PKEY_get1_RSA BORINGSSL_PREFIX %+ _EVP_PKEY_get1_RSA +%xdefine EVP_PKEY_get_raw_private_key BORINGSSL_PREFIX %+ _EVP_PKEY_get_raw_private_key +%xdefine EVP_PKEY_get_raw_public_key BORINGSSL_PREFIX %+ _EVP_PKEY_get_raw_public_key %xdefine EVP_PKEY_id BORINGSSL_PREFIX %+ _EVP_PKEY_id %xdefine EVP_PKEY_is_opaque BORINGSSL_PREFIX %+ _EVP_PKEY_is_opaque %xdefine EVP_PKEY_keygen BORINGSSL_PREFIX %+ _EVP_PKEY_keygen %xdefine EVP_PKEY_keygen_init BORINGSSL_PREFIX %+ _EVP_PKEY_keygen_init %xdefine EVP_PKEY_missing_parameters BORINGSSL_PREFIX %+ _EVP_PKEY_missing_parameters %xdefine EVP_PKEY_new BORINGSSL_PREFIX %+ _EVP_PKEY_new -%xdefine EVP_PKEY_new_ed25519_private BORINGSSL_PREFIX %+ _EVP_PKEY_new_ed25519_private -%xdefine EVP_PKEY_new_ed25519_public BORINGSSL_PREFIX %+ _EVP_PKEY_new_ed25519_public +%xdefine EVP_PKEY_new_raw_private_key BORINGSSL_PREFIX %+ _EVP_PKEY_new_raw_private_key +%xdefine EVP_PKEY_new_raw_public_key BORINGSSL_PREFIX %+ _EVP_PKEY_new_raw_public_key %xdefine EVP_PKEY_paramgen BORINGSSL_PREFIX %+ _EVP_PKEY_paramgen %xdefine EVP_PKEY_paramgen_init BORINGSSL_PREFIX %+ _EVP_PKEY_paramgen_init %xdefine EVP_PKEY_print_params BORINGSSL_PREFIX %+ _EVP_PKEY_print_params @@ -4661,6 +4701,9 @@ %xdefine SHA512_Init BORINGSSL_PREFIX %+ _SHA512_Init %xdefine SHA512_Transform BORINGSSL_PREFIX %+ _SHA512_Transform %xdefine SHA512_Update BORINGSSL_PREFIX %+ _SHA512_Update +%xdefine SIKE_decaps BORINGSSL_PREFIX %+ _SIKE_decaps +%xdefine SIKE_encaps BORINGSSL_PREFIX %+ _SIKE_encaps +%xdefine SIKE_keypair BORINGSSL_PREFIX %+ _SIKE_keypair %xdefine SPAKE2_CTX_free BORINGSSL_PREFIX %+ _SPAKE2_CTX_free %xdefine SPAKE2_CTX_new BORINGSSL_PREFIX %+ _SPAKE2_CTX_new %xdefine SPAKE2_generate_msg BORINGSSL_PREFIX %+ _SPAKE2_generate_msg @@ -4943,6 +4986,7 @@ %xdefine SSL_get_curve_id BORINGSSL_PREFIX %+ _SSL_get_curve_id %xdefine SSL_get_curve_name BORINGSSL_PREFIX %+ _SSL_get_curve_name %xdefine SSL_get_default_timeout BORINGSSL_PREFIX %+ _SSL_get_default_timeout +%xdefine SSL_get_early_data_reason BORINGSSL_PREFIX %+ _SSL_get_early_data_reason %xdefine SSL_get_error BORINGSSL_PREFIX %+ _SSL_get_error %xdefine SSL_get_ex_data BORINGSSL_PREFIX %+ _SSL_get_ex_data %xdefine SSL_get_ex_data_X509_STORE_CTX_idx BORINGSSL_PREFIX %+ _SSL_get_ex_data_X509_STORE_CTX_idx @@ -6011,6 +6055,8 @@ %xdefine ecp_nistz256_sqr_mont BORINGSSL_PREFIX %+ _ecp_nistz256_sqr_mont %xdefine ed25519_asn1_meth BORINGSSL_PREFIX %+ _ed25519_asn1_meth %xdefine ed25519_pkey_meth BORINGSSL_PREFIX %+ _ed25519_pkey_meth +%xdefine eval_3_isog BORINGSSL_PREFIX %+ _eval_3_isog +%xdefine eval_4_isog BORINGSSL_PREFIX %+ _eval_4_isog %xdefine gcm_ghash_4bit BORINGSSL_PREFIX %+ _gcm_ghash_4bit %xdefine gcm_ghash_avx BORINGSSL_PREFIX %+ _gcm_ghash_avx %xdefine gcm_ghash_clmul BORINGSSL_PREFIX %+ _gcm_ghash_clmul @@ -6023,6 +6069,9 @@ %xdefine gcm_init_avx BORINGSSL_PREFIX %+ _gcm_init_avx %xdefine gcm_init_clmul BORINGSSL_PREFIX %+ _gcm_init_clmul %xdefine gcm_init_ssse3 BORINGSSL_PREFIX %+ _gcm_init_ssse3 +%xdefine get_3_isog BORINGSSL_PREFIX %+ _get_3_isog +%xdefine get_4_isog BORINGSSL_PREFIX %+ _get_4_isog +%xdefine get_A BORINGSSL_PREFIX %+ _get_A %xdefine i2a_ACCESS_DESCRIPTION BORINGSSL_PREFIX %+ _i2a_ACCESS_DESCRIPTION %xdefine i2a_ASN1_ENUMERATED BORINGSSL_PREFIX %+ _i2a_ASN1_ENUMERATED %xdefine i2a_ASN1_INTEGER BORINGSSL_PREFIX %+ _i2a_ASN1_INTEGER @@ -6170,6 +6219,8 @@ %xdefine i2v_ASN1_BIT_STRING BORINGSSL_PREFIX %+ _i2v_ASN1_BIT_STRING %xdefine i2v_GENERAL_NAME BORINGSSL_PREFIX %+ _i2v_GENERAL_NAME %xdefine i2v_GENERAL_NAMES BORINGSSL_PREFIX %+ _i2v_GENERAL_NAMES +%xdefine inv_3_way BORINGSSL_PREFIX %+ _inv_3_way +%xdefine j_inv BORINGSSL_PREFIX %+ _j_inv %xdefine kBoringSSLRSASqrtTwo BORINGSSL_PREFIX %+ _kBoringSSLRSASqrtTwo %xdefine kBoringSSLRSASqrtTwoLen BORINGSSL_PREFIX %+ _kBoringSSLRSASqrtTwoLen %xdefine kOpenSSLReasonStringData BORINGSSL_PREFIX %+ _kOpenSSLReasonStringData @@ -6189,6 +6240,7 @@ %xdefine md4_block_data_order BORINGSSL_PREFIX %+ _md4_block_data_order %xdefine md5_block_asm_data_order BORINGSSL_PREFIX %+ _md5_block_asm_data_order %xdefine o2i_ECPublicKey BORINGSSL_PREFIX %+ _o2i_ECPublicKey +%xdefine p503 BORINGSSL_PREFIX %+ _p503 %xdefine pkcs12_iterations_acceptable BORINGSSL_PREFIX %+ _pkcs12_iterations_acceptable %xdefine pkcs12_key_gen BORINGSSL_PREFIX %+ _pkcs12_key_gen %xdefine pkcs12_pbe_encrypt_init BORINGSSL_PREFIX %+ _pkcs12_pbe_encrypt_init @@ -6222,6 +6274,23 @@ %xdefine sha1_block_data_order BORINGSSL_PREFIX %+ _sha1_block_data_order %xdefine sha256_block_data_order BORINGSSL_PREFIX %+ _sha256_block_data_order %xdefine sha512_block_data_order BORINGSSL_PREFIX %+ _sha512_block_data_order +%xdefine sike_cswap_asm BORINGSSL_PREFIX %+ _sike_cswap_asm +%xdefine sike_fp2inv_mont BORINGSSL_PREFIX %+ _sike_fp2inv_mont +%xdefine sike_fp2mul_mont BORINGSSL_PREFIX %+ _sike_fp2mul_mont +%xdefine sike_fp2sqr_mont BORINGSSL_PREFIX %+ _sike_fp2sqr_mont +%xdefine sike_fpadd BORINGSSL_PREFIX %+ _sike_fpadd +%xdefine sike_fpcopy BORINGSSL_PREFIX %+ _sike_fpcopy +%xdefine sike_fpcorrection BORINGSSL_PREFIX %+ _sike_fpcorrection +%xdefine sike_fpdiv2 BORINGSSL_PREFIX %+ _sike_fpdiv2 +%xdefine sike_fpmul_mont BORINGSSL_PREFIX %+ _sike_fpmul_mont +%xdefine sike_fpneg BORINGSSL_PREFIX %+ _sike_fpneg +%xdefine sike_fprdc BORINGSSL_PREFIX %+ _sike_fprdc +%xdefine sike_fpsub BORINGSSL_PREFIX %+ _sike_fpsub +%xdefine sike_from_mont BORINGSSL_PREFIX %+ _sike_from_mont +%xdefine sike_mpadd_asm BORINGSSL_PREFIX %+ _sike_mpadd_asm +%xdefine sike_mpdblsubx2_asm BORINGSSL_PREFIX %+ _sike_mpdblsubx2_asm +%xdefine sike_mpmul BORINGSSL_PREFIX %+ _sike_mpmul +%xdefine sike_mpsubx2_asm BORINGSSL_PREFIX %+ _sike_mpsubx2_asm %xdefine sk_CRYPTO_BUFFER_call_copy_func BORINGSSL_PREFIX %+ _sk_CRYPTO_BUFFER_call_copy_func %xdefine sk_CRYPTO_BUFFER_call_free_func BORINGSSL_PREFIX %+ _sk_CRYPTO_BUFFER_call_free_func %xdefine sk_CRYPTO_BUFFER_deep_copy BORINGSSL_PREFIX %+ _sk_CRYPTO_BUFFER_deep_copy @@ -6336,5 +6405,10 @@ %xdefine x509_rsa_pss_to_ctx BORINGSSL_PREFIX %+ _x509_rsa_pss_to_ctx %xdefine x509v3_bytes_to_hex BORINGSSL_PREFIX %+ _x509v3_bytes_to_hex %xdefine x509v3_hex_to_bytes BORINGSSL_PREFIX %+ _x509v3_hex_to_bytes +%xdefine x509v3_looks_like_dns_name BORINGSSL_PREFIX %+ _x509v3_looks_like_dns_name %xdefine x509v3_name_cmp BORINGSSL_PREFIX %+ _x509v3_name_cmp +%xdefine xDBLADD BORINGSSL_PREFIX %+ _xDBLADD +%xdefine xDBLe BORINGSSL_PREFIX %+ _xDBLe +%xdefine xTPL BORINGSSL_PREFIX %+ _xTPL +%xdefine xTPLe BORINGSSL_PREFIX %+ _xTPLe %endif diff --git a/Sources/CNIOBoringSSL/ssl/d1_both.cc b/Sources/CNIOBoringSSL/ssl/d1_both.cc index 7a35f967..dea5f655 100644 --- a/Sources/CNIOBoringSSL/ssl/d1_both.cc +++ b/Sources/CNIOBoringSSL/ssl/d1_both.cc @@ -405,7 +405,7 @@ ssl_open_record_t dtls1_open_handshake(SSL *ssl, size_t *out_consumed, return ssl_open_record_success; } -bool dtls1_get_message(SSL *ssl, SSLMessage *out) { +bool dtls1_get_message(const SSL *ssl, SSLMessage *out) { if (!dtls1_is_current_message_complete(ssl)) { return false; } diff --git a/Sources/CNIOBoringSSL/ssl/handoff.cc b/Sources/CNIOBoringSSL/ssl/handoff.cc index 1772f59a..6239045a 100644 --- a/Sources/CNIOBoringSSL/ssl/handoff.cc +++ b/Sources/CNIOBoringSSL/ssl/handoff.cc @@ -49,7 +49,8 @@ static bool serialize_features(CBB *out) { return CBB_flush(out); } -bool SSL_serialize_handoff(const SSL *ssl, CBB *out) { +bool SSL_serialize_handoff(const SSL *ssl, CBB *out, + SSL_CLIENT_HELLO *out_hello) { const SSL3_STATE *const s3 = ssl->s3; if (!ssl->server || s3->hs == nullptr || @@ -58,6 +59,7 @@ bool SSL_serialize_handoff(const SSL *ssl, CBB *out) { } CBB seq; + SSLMessage msg; Span transcript = s3->hs->transcript.buffer(); if (!CBB_add_asn1(out, &seq, CBS_ASN1_SEQUENCE) || !CBB_add_asn1_uint64(&seq, kHandoffVersion) || @@ -66,7 +68,9 @@ bool SSL_serialize_handoff(const SSL *ssl, CBB *out) { reinterpret_cast(s3->hs_buf->data), s3->hs_buf->length) || !serialize_features(&seq) || - !CBB_flush(out)) { + !CBB_flush(out) || + !ssl->method->get_message(ssl, &msg) || + !ssl_client_hello_init(ssl, out_hello, msg)) { return false; } @@ -446,6 +450,10 @@ bool SSL_apply_handback(SSL *ssl, Span handback) { s3->aead_write_ctx->SetVersionIfNullCipher(ssl->version); s3->hs->cert_request = cert_request; + // TODO(davidben): When handoff for TLS 1.3 is added, serialize + // |early_data_reason| and stabilize the constants. + s3->early_data_reason = ssl_early_data_protocol_version; + Array key_block; if ((type == handback_after_session_resumption || type == handback_after_handshake) && diff --git a/Sources/CNIOBoringSSL/ssl/handshake.cc b/Sources/CNIOBoringSSL/ssl/handshake.cc index 094023f6..68d29901 100644 --- a/Sources/CNIOBoringSSL/ssl/handshake.cc +++ b/Sources/CNIOBoringSSL/ssl/handshake.cc @@ -648,6 +648,7 @@ int ssl_run_handshake(SSL_HANDSHAKE *hs, bool *out_early_return) { return -1; case ssl_hs_early_data_rejected: + assert(ssl->s3->early_data_reason != ssl_early_data_unknown); ssl->s3->rwstate = SSL_EARLY_DATA_REJECTED; // Cause |SSL_write| to start failing immediately. hs->can_early_write = false; diff --git a/Sources/CNIOBoringSSL/ssl/handshake_server.cc b/Sources/CNIOBoringSSL/ssl/handshake_server.cc index ea0928f6..6c4b15f1 100644 --- a/Sources/CNIOBoringSSL/ssl/handshake_server.cc +++ b/Sources/CNIOBoringSSL/ssl/handshake_server.cc @@ -515,10 +515,6 @@ static enum ssl_hs_wait_t do_read_client_hello(SSL_HANDSHAKE *hs) { return ssl_hs_error; } - if (hs->config->handoff) { - return ssl_hs_handoff; - } - SSL_CLIENT_HELLO client_hello; if (!ssl_client_hello_init(ssl, &client_hello, msg)) { OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR); @@ -526,6 +522,10 @@ static enum ssl_hs_wait_t do_read_client_hello(SSL_HANDSHAKE *hs) { return ssl_hs_error; } + if (hs->config->handoff) { + return ssl_hs_handoff; + } + // Run the early callback. if (ssl->ctx->select_certificate_cb != NULL) { switch (ssl->ctx->select_certificate_cb(&client_hello)) { @@ -635,6 +635,8 @@ static enum ssl_hs_wait_t do_select_certificate(SSL_HANDSHAKE *hs) { return ssl_hs_ok; } + ssl->s3->early_data_reason = ssl_early_data_protocol_version; + SSL_CLIENT_HELLO client_hello; if (!ssl_client_hello_init(ssl, &client_hello, msg)) { return ssl_hs_error; diff --git a/Sources/CNIOBoringSSL/ssl/internal.h b/Sources/CNIOBoringSSL/ssl/internal.h index 380fa9d0..a5c2729c 100644 --- a/Sources/CNIOBoringSSL/ssl/internal.h +++ b/Sources/CNIOBoringSSL/ssl/internal.h @@ -1081,7 +1081,7 @@ void dtls_clear_outgoing_messages(SSL *ssl); void ssl_do_info_callback(const SSL *ssl, int type, int value); // ssl_do_msg_callback calls |ssl|'s message callback, if set. -void ssl_do_msg_callback(SSL *ssl, int is_write, int content_type, +void ssl_do_msg_callback(const SSL *ssl, int is_write, int content_type, Span in); @@ -1798,7 +1798,7 @@ int ssl_log_secret(const SSL *ssl, const char *label, const uint8_t *secret, // ClientHello functions. -bool ssl_client_hello_init(SSL *ssl, SSL_CLIENT_HELLO *out, +bool ssl_client_hello_init(const SSL *ssl, SSL_CLIENT_HELLO *out, const SSLMessage &msg); bool ssl_client_hello_get_extension(const SSL_CLIENT_HELLO *client_hello, @@ -1958,7 +1958,7 @@ struct SSL_PROTOCOL_METHOD { void (*ssl_free)(SSL *ssl); // get_message sets |*out| to the current handshake message and returns true // if one has been received. It returns false if more input is needed. - bool (*get_message)(SSL *ssl, SSLMessage *out); + bool (*get_message)(const SSL *ssl, SSLMessage *out); // next_message is called to release the current handshake message. void (*next_message)(SSL *ssl); // Use the |ssl_open_handshake| wrapper. @@ -2029,7 +2029,7 @@ struct SSL_X509_METHOD { // check_client_CA_list returns one if |names| is a good list of X.509 // distinguished names and zero otherwise. This is used to ensure that we can // reject unparsable values at handshake time when using crypto/x509. - int (*check_client_CA_list)(STACK_OF(CRYPTO_BUFFER) *names); + bool (*check_client_CA_list)(STACK_OF(CRYPTO_BUFFER) *names); // cert_clear frees and NULLs all X509 certificate-related state. void (*cert_clear)(CERT *cert); @@ -2046,35 +2046,35 @@ struct SSL_X509_METHOD { // session_cache_objects fills out |sess->x509_peer| and |sess->x509_chain| // from |sess->certs| and erases |sess->x509_chain_without_leaf|. It returns - // one on success or zero on error. - int (*session_cache_objects)(SSL_SESSION *session); + // true on success or false on error. + bool (*session_cache_objects)(SSL_SESSION *session); // session_dup duplicates any needed fields from |session| to |new_session|. - // It returns one on success or zero on error. - int (*session_dup)(SSL_SESSION *new_session, const SSL_SESSION *session); + // It returns true on success or false on error. + bool (*session_dup)(SSL_SESSION *new_session, const SSL_SESSION *session); // session_clear frees any X509-related state from |session|. void (*session_clear)(SSL_SESSION *session); // session_verify_cert_chain verifies the certificate chain in |session|, - // sets |session->verify_result| and returns one on success or zero on + // sets |session->verify_result| and returns true on success or false on // error. - int (*session_verify_cert_chain)(SSL_SESSION *session, SSL_HANDSHAKE *ssl, - uint8_t *out_alert); + bool (*session_verify_cert_chain)(SSL_SESSION *session, SSL_HANDSHAKE *ssl, + uint8_t *out_alert); // hs_flush_cached_ca_names drops any cached |X509_NAME|s from |hs|. void (*hs_flush_cached_ca_names)(SSL_HANDSHAKE *hs); - // ssl_new does any neccessary initialisation of |hs|. It returns one on - // success or zero on error. - int (*ssl_new)(SSL_HANDSHAKE *hs); + // ssl_new does any necessary initialisation of |hs|. It returns true on + // success or false on error. + bool (*ssl_new)(SSL_HANDSHAKE *hs); // ssl_free frees anything created by |ssl_new|. void (*ssl_config_free)(SSL_CONFIG *cfg); // ssl_flush_cached_client_CA drops any cached |X509_NAME|s from |ssl|. void (*ssl_flush_cached_client_CA)(SSL_CONFIG *cfg); // ssl_auto_chain_if_needed runs the deprecated auto-chaining logic if // necessary. On success, it updates |ssl|'s certificate configuration as - // needed and returns one. Otherwise, it returns zero. - int (*ssl_auto_chain_if_needed)(SSL_HANDSHAKE *hs); - // ssl_ctx_new does any neccessary initialisation of |ctx|. It returns one on - // success or zero on error. - int (*ssl_ctx_new)(SSL_CTX *ctx); + // needed and returns true. Otherwise, it returns false. + bool (*ssl_auto_chain_if_needed)(SSL_HANDSHAKE *hs); + // ssl_ctx_new does any necessary initialisation of |ctx|. It returns true on + // success or false on error. + bool (*ssl_ctx_new)(SSL_CTX *ctx); // ssl_ctx_free frees anything created by |ssl_ctx_new|. void (*ssl_ctx_free)(SSL_CTX *ctx); // ssl_ctx_flush_cached_client_CA drops any cached |X509_NAME|s from |ctx|. @@ -2266,6 +2266,9 @@ struct SSL3_STATE { // which resumed a session. int32_t ticket_age_skew = 0; + // ssl_early_data_reason stores details on why 0-RTT was accepted or rejected. + enum ssl_early_data_reason_t early_data_reason = ssl_early_data_unknown; + // aead_read_ctx is the current read cipher state. UniquePtr aead_read_ctx; @@ -2674,8 +2677,9 @@ void ssl_session_renew_timeout(SSL *ssl, SSL_SESSION *session, void ssl_update_cache(SSL_HANDSHAKE *hs, int mode); -int ssl_send_alert(SSL *ssl, int level, int desc); -bool ssl3_get_message(SSL *ssl, SSLMessage *out); +void ssl_send_alert(SSL *ssl, int level, int desc); +int ssl_send_alert_impl(SSL *ssl, int level, int desc); +bool ssl3_get_message(const SSL *ssl, SSLMessage *out); ssl_open_record_t ssl3_open_handshake(SSL *ssl, size_t *out_consumed, uint8_t *out_alert, Span in); void ssl3_next_message(SSL *ssl); @@ -2741,7 +2745,7 @@ unsigned int dtls1_min_mtu(void); bool dtls1_new(SSL *ssl); void dtls1_free(SSL *ssl); -bool dtls1_get_message(SSL *ssl, SSLMessage *out); +bool dtls1_get_message(const SSL *ssl, SSLMessage *out); ssl_open_record_t dtls1_open_handshake(SSL *ssl, size_t *out_consumed, uint8_t *out_alert, Span in); void dtls1_next_message(SSL *ssl); diff --git a/Sources/CNIOBoringSSL/ssl/s3_both.cc b/Sources/CNIOBoringSSL/ssl/s3_both.cc index ff685467..598a1590 100644 --- a/Sources/CNIOBoringSSL/ssl/s3_both.cc +++ b/Sources/CNIOBoringSSL/ssl/s3_both.cc @@ -494,7 +494,7 @@ static bool parse_message(const SSL *ssl, SSLMessage *out, return true; } -bool ssl3_get_message(SSL *ssl, SSLMessage *out) { +bool ssl3_get_message(const SSL *ssl, SSLMessage *out) { size_t unused; if (!parse_message(ssl, out, &unused)) { return false; diff --git a/Sources/CNIOBoringSSL/ssl/s3_pkt.cc b/Sources/CNIOBoringSSL/ssl/s3_pkt.cc index 6110e9d9..727ab965 100644 --- a/Sources/CNIOBoringSSL/ssl/s3_pkt.cc +++ b/Sources/CNIOBoringSSL/ssl/s3_pkt.cc @@ -118,6 +118,7 @@ #include #include +#include "../crypto/err/internal.h" #include "../crypto/internal.h" #include "internal.h" @@ -381,7 +382,24 @@ ssl_open_record_t ssl3_open_change_cipher_spec(SSL *ssl, size_t *out_consumed, return ssl_open_record_success; } -int ssl_send_alert(SSL *ssl, int level, int desc) { +void ssl_send_alert(SSL *ssl, int level, int desc) { + // This function is called in response to a fatal error from the peer. Ignore + // any failures writing the alert and report only the original error. In + // particular, if the transport uses |SSL_write|, our existing error will be + // clobbered so we must save and restore the error queue. See + // https://crbug.com/959305. + // + // TODO(davidben): Return the alert out of the handshake, rather than calling + // this function internally everywhere. + // + // TODO(davidben): This does not allow retrying if the alert hit EAGAIN. See + // https://crbug.com/boringssl/130. + UniquePtr err_state(ERR_save_state()); + ssl_send_alert_impl(ssl, level, desc); + ERR_restore_state(err_state.get()); +} + +int ssl_send_alert_impl(SSL *ssl, int level, int desc) { // It is illegal to send an alert when we've already sent a closing one. if (ssl->s3->write_shutdown != ssl_shutdown_none) { OPENSSL_PUT_ERROR(SSL, SSL_R_PROTOCOL_IS_SHUTDOWN); diff --git a/Sources/CNIOBoringSSL/ssl/ssl_lib.cc b/Sources/CNIOBoringSSL/ssl/ssl_lib.cc index bd0743ea..bd94e584 100644 --- a/Sources/CNIOBoringSSL/ssl/ssl_lib.cc +++ b/Sources/CNIOBoringSSL/ssl/ssl_lib.cc @@ -378,7 +378,7 @@ void ssl_do_info_callback(const SSL *ssl, int type, int value) { } } -void ssl_do_msg_callback(SSL *ssl, int is_write, int content_type, +void ssl_do_msg_callback(const SSL *ssl, int is_write, int content_type, Span in) { if (ssl->msg_callback == NULL) { return; @@ -399,8 +399,8 @@ void ssl_do_msg_callback(SSL *ssl, int is_write, int content_type, version = SSL_version(ssl); } - ssl->msg_callback(is_write, version, content_type, in.data(), in.size(), ssl, - ssl->msg_callback_arg); + ssl->msg_callback(is_write, version, content_type, in.data(), in.size(), + const_cast(ssl), ssl->msg_callback_arg); } void ssl_get_current_time(const SSL *ssl, struct OPENSSL_timeval *out_clock) { @@ -1195,7 +1195,7 @@ int SSL_shutdown(SSL *ssl) { if (ssl->s3->write_shutdown != ssl_shutdown_close_notify) { // Send a close_notify. - if (ssl_send_alert(ssl, SSL3_AL_WARNING, SSL_AD_CLOSE_NOTIFY) <= 0) { + if (ssl_send_alert_impl(ssl, SSL3_AL_WARNING, SSL_AD_CLOSE_NOTIFY) <= 0) { return -1; } } else if (ssl->s3->alert_dispatch) { @@ -1242,7 +1242,7 @@ int SSL_send_fatal_alert(SSL *ssl, uint8_t alert) { return ssl->method->dispatch_alert(ssl); } - return ssl_send_alert(ssl, SSL3_AL_FATAL, alert); + return ssl_send_alert_impl(ssl, SSL3_AL_FATAL, alert); } int SSL_set_quic_transport_params(SSL *ssl, const uint8_t *params, @@ -1294,6 +1294,10 @@ void SSL_reset_early_data_reject(SSL *ssl) { ssl->s3->wpend_pending = false; } +enum ssl_early_data_reason_t SSL_get_early_data_reason(const SSL *ssl) { + return ssl->s3->early_data_reason; +} + static int bio_retry_reason_to_error(int reason) { switch (reason) { case BIO_RR_CONNECT: diff --git a/Sources/CNIOBoringSSL/ssl/ssl_x509.cc b/Sources/CNIOBoringSSL/ssl/ssl_x509.cc index 05a108f6..fb3312b4 100644 --- a/Sources/CNIOBoringSSL/ssl/ssl_x509.cc +++ b/Sources/CNIOBoringSSL/ssl/ssl_x509.cc @@ -200,19 +200,19 @@ static UniquePtr new_leafless_chain(void) { // forms of elements of |chain|. It returns one on success or zero on error, in // which case no change to |cert->chain| is made. It preverses the existing // leaf from |cert->chain|, if any. -static int ssl_cert_set_chain(CERT *cert, STACK_OF(X509) *chain) { +static bool ssl_cert_set_chain(CERT *cert, STACK_OF(X509) *chain) { UniquePtr new_chain; if (cert->chain != nullptr) { new_chain.reset(sk_CRYPTO_BUFFER_new_null()); if (!new_chain) { - return 0; + return false; } // |leaf| might be NULL if it's a “leafless” chain. CRYPTO_BUFFER *leaf = sk_CRYPTO_BUFFER_value(cert->chain.get(), 0); if (!PushToStack(new_chain.get(), UpRef(leaf))) { - return 0; + return false; } } @@ -220,32 +220,32 @@ static int ssl_cert_set_chain(CERT *cert, STACK_OF(X509) *chain) { if (!new_chain) { new_chain = new_leafless_chain(); if (!new_chain) { - return 0; + return false; } } UniquePtr buffer = x509_to_buffer(x509); if (!buffer || !PushToStack(new_chain.get(), std::move(buffer))) { - return 0; + return false; } } cert->chain = std::move(new_chain); - return 1; + return true; } static void ssl_crypto_x509_cert_flush_cached_leaf(CERT *cert) { X509_free(cert->x509_leaf); - cert->x509_leaf = NULL; + cert->x509_leaf = nullptr; } static void ssl_crypto_x509_cert_flush_cached_chain(CERT *cert) { sk_X509_pop_free(cert->x509_chain, X509_free); - cert->x509_chain = NULL; + cert->x509_chain = nullptr; } -static int ssl_crypto_x509_check_client_CA_list( +static bool ssl_crypto_x509_check_client_CA_list( STACK_OF(CRYPTO_BUFFER) *names) { for (const CRYPTO_BUFFER *buffer : names) { const uint8_t *inp = CRYPTO_BUFFER_data(buffer); @@ -253,11 +253,11 @@ static int ssl_crypto_x509_check_client_CA_list( d2i_X509_NAME(nullptr, &inp, CRYPTO_BUFFER_len(buffer))); if (name == nullptr || inp != CRYPTO_BUFFER_data(buffer) + CRYPTO_BUFFER_len(buffer)) { - return 0; + return false; } } - return 1; + return true; } static void ssl_crypto_x509_cert_clear(CERT *cert) { @@ -265,7 +265,7 @@ static void ssl_crypto_x509_cert_clear(CERT *cert) { ssl_crypto_x509_cert_flush_cached_chain(cert); X509_free(cert->x509_stash); - cert->x509_stash = NULL; + cert->x509_stash = nullptr; } static void ssl_crypto_x509_cert_free(CERT *cert) { @@ -274,19 +274,19 @@ static void ssl_crypto_x509_cert_free(CERT *cert) { } static void ssl_crypto_x509_cert_dup(CERT *new_cert, const CERT *cert) { - if (cert->verify_store != NULL) { + if (cert->verify_store != nullptr) { X509_STORE_up_ref(cert->verify_store); new_cert->verify_store = cert->verify_store; } } -static int ssl_crypto_x509_session_cache_objects(SSL_SESSION *sess) { +static bool ssl_crypto_x509_session_cache_objects(SSL_SESSION *sess) { bssl::UniquePtr chain, chain_without_leaf; if (sk_CRYPTO_BUFFER_num(sess->certs.get()) > 0) { chain.reset(sk_X509_new_null()); if (!chain) { OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return 0; + return false; } if (sess->is_server) { // chain_without_leaf is only needed for server sessions. See @@ -294,7 +294,7 @@ static int ssl_crypto_x509_session_cache_objects(SSL_SESSION *sess) { chain_without_leaf.reset(sk_X509_new_null()); if (!chain_without_leaf) { OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return 0; + return false; } } } @@ -304,18 +304,18 @@ static int ssl_crypto_x509_session_cache_objects(SSL_SESSION *sess) { UniquePtr x509(X509_parse_from_buffer(cert)); if (!x509) { OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR); - return 0; + return false; } if (leaf == nullptr) { leaf = UpRef(x509); } else if (chain_without_leaf && !PushToStack(chain_without_leaf.get(), UpRef(x509))) { OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return 0; + return false; } if (!PushToStack(chain.get(), std::move(x509))) { OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); - return 0; + return false; } } @@ -327,80 +327,76 @@ static int ssl_crypto_x509_session_cache_objects(SSL_SESSION *sess) { X509_free(sess->x509_peer); sess->x509_peer = leaf.release(); - return 1; + return true; } -static int ssl_crypto_x509_session_dup(SSL_SESSION *new_session, - const SSL_SESSION *session) { +static bool ssl_crypto_x509_session_dup(SSL_SESSION *new_session, + const SSL_SESSION *session) { new_session->x509_peer = UpRef(session->x509_peer).release(); if (session->x509_chain != nullptr) { new_session->x509_chain = X509_chain_up_ref(session->x509_chain); if (new_session->x509_chain == nullptr) { - return 0; + return false; } } if (session->x509_chain_without_leaf != nullptr) { new_session->x509_chain_without_leaf = X509_chain_up_ref(session->x509_chain_without_leaf); if (new_session->x509_chain_without_leaf == nullptr) { - return 0; + return false; } } - return 1; + return true; } static void ssl_crypto_x509_session_clear(SSL_SESSION *session) { X509_free(session->x509_peer); - session->x509_peer = NULL; + session->x509_peer = nullptr; sk_X509_pop_free(session->x509_chain, X509_free); - session->x509_chain = NULL; + session->x509_chain = nullptr; sk_X509_pop_free(session->x509_chain_without_leaf, X509_free); - session->x509_chain_without_leaf = NULL; + session->x509_chain_without_leaf = nullptr; } -static int ssl_crypto_x509_session_verify_cert_chain(SSL_SESSION *session, - SSL_HANDSHAKE *hs, - uint8_t *out_alert) { +static bool ssl_crypto_x509_session_verify_cert_chain(SSL_SESSION *session, + SSL_HANDSHAKE *hs, + uint8_t *out_alert) { *out_alert = SSL_AD_INTERNAL_ERROR; STACK_OF(X509) *const cert_chain = session->x509_chain; - if (cert_chain == NULL || sk_X509_num(cert_chain) == 0) { - return 0; + if (cert_chain == nullptr || sk_X509_num(cert_chain) == 0) { + return false; } SSL_CTX *ssl_ctx = hs->ssl->ctx.get(); X509_STORE *verify_store = ssl_ctx->cert_store; - if (hs->config->cert->verify_store != NULL) { + if (hs->config->cert->verify_store != nullptr) { verify_store = hs->config->cert->verify_store; } X509 *leaf = sk_X509_value(cert_chain, 0); ScopedX509_STORE_CTX ctx; - if (!X509_STORE_CTX_init(ctx.get(), verify_store, leaf, cert_chain)) { + if (!X509_STORE_CTX_init(ctx.get(), verify_store, leaf, cert_chain) || + !X509_STORE_CTX_set_ex_data( + ctx.get(), SSL_get_ex_data_X509_STORE_CTX_idx(), hs->ssl) || + // We need to inherit the verify parameters. These can be determined by + // the context: if its a server it will verify SSL client certificates or + // vice versa. + !X509_STORE_CTX_set_default( + ctx.get(), hs->ssl->server ? "ssl_client" : "ssl_server") || + // Anything non-default in "param" should overwrite anything in the ctx. + !X509_VERIFY_PARAM_set1(X509_STORE_CTX_get0_param(ctx.get()), + hs->config->param)) { OPENSSL_PUT_ERROR(SSL, ERR_R_X509_LIB); - return 0; - } - if (!X509_STORE_CTX_set_ex_data( - ctx.get(), SSL_get_ex_data_X509_STORE_CTX_idx(), hs->ssl)) { - return 0; + return false; } - // We need to inherit the verify parameters. These can be determined by the - // context: if its a server it will verify SSL client certificates or vice - // versa. - X509_STORE_CTX_set_default(ctx.get(), - hs->ssl->server ? "ssl_client" : "ssl_server"); - - // Anything non-default in "param" should overwrite anything in the ctx. - X509_VERIFY_PARAM_set1(X509_STORE_CTX_get0_param(ctx.get()), - hs->config->param); - if (hs->config->verify_callback) { X509_STORE_CTX_set_verify_cb(ctx.get(), hs->config->verify_callback); } int verify_ret; - if (ssl_ctx->app_verify_callback != NULL) { + if (ssl_ctx->app_verify_callback != nullptr) { verify_ret = ssl_ctx->app_verify_callback(ctx.get(), ssl_ctx->app_verify_arg); } else { @@ -412,59 +408,59 @@ static int ssl_crypto_x509_session_verify_cert_chain(SSL_SESSION *session, // If |SSL_VERIFY_NONE|, the error is non-fatal, but we keep the result. if (verify_ret <= 0 && hs->config->verify_mode != SSL_VERIFY_NONE) { *out_alert = SSL_alert_from_verify_result(ctx->error); - return 0; + return false; } ERR_clear_error(); - return 1; + return true; } static void ssl_crypto_x509_hs_flush_cached_ca_names(SSL_HANDSHAKE *hs) { sk_X509_NAME_pop_free(hs->cached_x509_ca_names, X509_NAME_free); - hs->cached_x509_ca_names = NULL; + hs->cached_x509_ca_names = nullptr; } -static int ssl_crypto_x509_ssl_new(SSL_HANDSHAKE *hs) { +static bool ssl_crypto_x509_ssl_new(SSL_HANDSHAKE *hs) { hs->config->param = X509_VERIFY_PARAM_new(); - if (hs->config->param == NULL) { - return 0; + if (hs->config->param == nullptr) { + return false; } X509_VERIFY_PARAM_inherit(hs->config->param, hs->ssl->ctx->param); - return 1; + return true; } static void ssl_crypto_x509_ssl_flush_cached_client_CA(SSL_CONFIG *cfg) { sk_X509_NAME_pop_free(cfg->cached_x509_client_CA, X509_NAME_free); - cfg->cached_x509_client_CA = NULL; + cfg->cached_x509_client_CA = nullptr; } static void ssl_crypto_x509_ssl_config_free(SSL_CONFIG *cfg) { sk_X509_NAME_pop_free(cfg->cached_x509_client_CA, X509_NAME_free); - cfg->cached_x509_client_CA = NULL; + cfg->cached_x509_client_CA = nullptr; X509_VERIFY_PARAM_free(cfg->param); } -static int ssl_crypto_x509_ssl_auto_chain_if_needed(SSL_HANDSHAKE *hs) { +static bool ssl_crypto_x509_ssl_auto_chain_if_needed(SSL_HANDSHAKE *hs) { // Only build a chain if there are no intermediates configured and the feature // isn't disabled. if ((hs->ssl->mode & SSL_MODE_NO_AUTO_CHAIN) || !ssl_has_certificate(hs) || hs->config->cert->chain == NULL || sk_CRYPTO_BUFFER_num(hs->config->cert->chain.get()) > 1) { - return 1; + return true; } UniquePtr leaf(X509_parse_from_buffer( sk_CRYPTO_BUFFER_value(hs->config->cert->chain.get(), 0))); if (!leaf) { OPENSSL_PUT_ERROR(SSL, ERR_R_X509_LIB); - return 0; + return false; } ScopedX509_STORE_CTX ctx; if (!X509_STORE_CTX_init(ctx.get(), hs->ssl->ctx->cert_store, leaf.get(), NULL)) { OPENSSL_PUT_ERROR(SSL, ERR_R_X509_LIB); - return 0; + return false; } // Attempt to build a chain, ignoring the result. @@ -475,23 +471,23 @@ static int ssl_crypto_x509_ssl_auto_chain_if_needed(SSL_HANDSHAKE *hs) { X509_free(sk_X509_shift(ctx->chain)); if (!ssl_cert_set_chain(hs->config->cert.get(), ctx->chain)) { - return 0; + return false; } ssl_crypto_x509_cert_flush_cached_chain(hs->config->cert.get()); - return 1; + return true; } static void ssl_crypto_x509_ssl_ctx_flush_cached_client_CA(SSL_CTX *ctx) { sk_X509_NAME_pop_free(ctx->cached_x509_client_CA, X509_NAME_free); - ctx->cached_x509_client_CA = NULL; + ctx->cached_x509_client_CA = nullptr; } -static int ssl_crypto_x509_ssl_ctx_new(SSL_CTX *ctx) { +static bool ssl_crypto_x509_ssl_ctx_new(SSL_CTX *ctx) { ctx->cert_store = X509_STORE_new(); ctx->param = X509_VERIFY_PARAM_new(); - return (ctx->cert_store != NULL && ctx->param != NULL); + return (ctx->cert_store != nullptr && ctx->param != nullptr); } static void ssl_crypto_x509_ssl_ctx_free(SSL_CTX *ctx) { diff --git a/Sources/CNIOBoringSSL/ssl/t1_lib.cc b/Sources/CNIOBoringSSL/ssl/t1_lib.cc index 8b58b077..3fa678e9 100644 --- a/Sources/CNIOBoringSSL/ssl/t1_lib.cc +++ b/Sources/CNIOBoringSSL/ssl/t1_lib.cc @@ -199,10 +199,10 @@ static bool tls1_check_duplicate_extensions(const CBS *cbs) { return true; } -bool ssl_client_hello_init(SSL *ssl, SSL_CLIENT_HELLO *out, +bool ssl_client_hello_init(const SSL *ssl, SSL_CLIENT_HELLO *out, const SSLMessage &msg) { OPENSSL_memset(out, 0, sizeof(*out)); - out->ssl = ssl; + out->ssl = const_cast(ssl); out->client_hello = CBS_data(&msg.body); out->client_hello_len = CBS_len(&msg.body); @@ -1790,7 +1790,7 @@ static bool ext_ec_point_add_extension(SSL_HANDSHAKE *hs, CBB *out) { } static bool ext_ec_point_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) { - // The point format extension is unneccessary in TLS 1.3. + // The point format extension is unnecessary in TLS 1.3. if (hs->min_version >= TLS1_3_VERSION) { return true; } @@ -2057,20 +2057,46 @@ static bool ext_psk_key_exchange_modes_parse_clienthello(SSL_HANDSHAKE *hs, static bool ext_early_data_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) { SSL *const ssl = hs->ssl; - if (!ssl->enable_early_data || - // Session must be 0-RTT capable. - ssl->session == nullptr || - ssl_session_protocol_version(ssl->session.get()) < TLS1_3_VERSION || - ssl->session->ticket_max_early_data == 0 || - // The second ClientHello never offers early data. - hs->received_hello_retry_request || - // In case ALPN preferences changed since this session was established, - // avoid reporting a confusing value in |SSL_get0_alpn_selected|. - (!ssl->session->early_alpn.empty() && - !ssl_is_alpn_protocol_allowed(hs, ssl->session->early_alpn))) { + // The second ClientHello never offers early data, and we must have already + // filled in |early_data_reason| by this point. + if (hs->received_hello_retry_request) { + assert(ssl->s3->early_data_reason != ssl_early_data_unknown); + return true; + } + + if (!ssl->enable_early_data) { + ssl->s3->early_data_reason = ssl_early_data_disabled; + return true; + } + + if (hs->max_version < TLS1_3_VERSION) { + // We discard inapplicable sessions, so this is redundant with the session + // checks below, but we check give a more useful reason. + ssl->s3->early_data_reason = ssl_early_data_protocol_version; + return true; + } + + if (ssl->session == nullptr) { + ssl->s3->early_data_reason = ssl_early_data_no_session_offered; return true; } + if (ssl_session_protocol_version(ssl->session.get()) < TLS1_3_VERSION || + ssl->session->ticket_max_early_data == 0) { + ssl->s3->early_data_reason = ssl_early_data_unsupported_for_session; + return true; + } + + // In case ALPN preferences changed since this session was established, avoid + // reporting a confusing value in |SSL_get0_alpn_selected| and sending early + // data we know will be rejected. + if (!ssl->session->early_alpn.empty() && + !ssl_is_alpn_protocol_allowed(hs, ssl->session->early_alpn)) { + ssl->s3->early_data_reason = ssl_early_data_alpn_mismatch; + return true; + } + + // |early_data_reason| will be filled in later when the server responds. hs->early_data_offered = true; if (!CBB_add_u16(out, TLSEXT_TYPE_early_data) || @@ -2083,12 +2109,27 @@ static bool ext_early_data_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) { } static bool ext_early_data_parse_serverhello(SSL_HANDSHAKE *hs, - uint8_t *out_alert, CBS *contents) { + uint8_t *out_alert, + CBS *contents) { SSL *const ssl = hs->ssl; if (contents == NULL) { + if (hs->early_data_offered && !hs->received_hello_retry_request) { + ssl->s3->early_data_reason = ssl->s3->session_reused + ? ssl_early_data_peer_declined + : ssl_early_data_session_not_resumed; + } else { + // We already filled in |early_data_reason| when declining to offer 0-RTT + // or handling the implicit HelloRetryRequest reject. + assert(ssl->s3->early_data_reason != ssl_early_data_unknown); + } return true; } + // If we received an HRR, the second ClientHello never offers early data, so + // the extensions logic will automatically reject early data extensions as + // unsolicited. This covered by the ServerAcceptsEarlyDataOnHRR test. + assert(!hs->received_hello_retry_request); + if (CBS_len(contents) != 0) { *out_alert = SSL_AD_DECODE_ERROR; return false; @@ -2100,6 +2141,7 @@ static bool ext_early_data_parse_serverhello(SSL_HANDSHAKE *hs, return false; } + ssl->s3->early_data_reason = ssl_early_data_accepted; ssl->s3->early_data_accepted = true; return true; } @@ -3061,6 +3103,9 @@ bool ssl_add_clienthello_tlsext(SSL_HANDSHAKE *hs, CBB *out, return false; } + // Note we may send multiple ClientHellos for DTLS HelloVerifyRequest and TLS + // 1.3 HelloRetryRequest. For the latter, the extensions may change, so it is + // important to reset this value. hs->extensions.sent = 0; for (size_t i = 0; i < kNumExtensions; i++) { diff --git a/Sources/CNIOBoringSSL/ssl/tls13_both.cc b/Sources/CNIOBoringSSL/ssl/tls13_both.cc index dc9bd84c..d68191e4 100644 --- a/Sources/CNIOBoringSSL/ssl/tls13_both.cc +++ b/Sources/CNIOBoringSSL/ssl/tls13_both.cc @@ -488,10 +488,10 @@ bool tls13_add_certificate(SSL_HANDSHAKE *hs) { if (ssl_signing_with_dc(hs)) { const CRYPTO_BUFFER *raw = dc->raw.get(); + CBB child; if (!CBB_add_u16(&extensions, TLSEXT_TYPE_delegated_credential) || - !CBB_add_u16(&extensions, CRYPTO_BUFFER_len(raw)) || - !CBB_add_bytes(&extensions, - CRYPTO_BUFFER_data(raw), + !CBB_add_u16_length_prefixed(&extensions, &child) || + !CBB_add_bytes(&child, CRYPTO_BUFFER_data(raw), CRYPTO_BUFFER_len(raw)) || !CBB_flush(&extensions)) { OPENSSL_PUT_ERROR(SSL, ERR_R_INTERNAL_ERROR); diff --git a/Sources/CNIOBoringSSL/ssl/tls13_client.cc b/Sources/CNIOBoringSSL/ssl/tls13_client.cc index 2389e2b0..afd3a9aa 100644 --- a/Sources/CNIOBoringSSL/ssl/tls13_client.cc +++ b/Sources/CNIOBoringSSL/ssl/tls13_client.cc @@ -188,6 +188,7 @@ static enum ssl_hs_wait_t do_read_hello_retry_request(SSL_HANDSHAKE *hs) { hs->tls13_state = state_send_second_client_hello; // 0-RTT is rejected if we receive a HelloRetryRequest. if (hs->in_early_data) { + ssl->s3->early_data_reason = ssl_early_data_hello_retry_request; return ssl_hs_early_data_rejected; } return ssl_hs_ok; diff --git a/Sources/CNIOBoringSSL/ssl/tls13_server.cc b/Sources/CNIOBoringSSL/ssl/tls13_server.cc index 8d2b6a70..743c1cf7 100644 --- a/Sources/CNIOBoringSSL/ssl/tls13_server.cc +++ b/Sources/CNIOBoringSSL/ssl/tls13_server.cc @@ -53,6 +53,12 @@ enum server_hs_state_t { static const uint8_t kZeroes[EVP_MAX_MD_SIZE] = {0}; +// Allow a minute of ticket age skew in either direction. This covers +// transmission delays in ClientHello and NewSessionTicket, as well as +// drift between client and server clock rate since the ticket was issued. +// See RFC 8446, section 8.3. +static const int32_t kMaxTicketAgeSkewSeconds = 60; + static int resolve_ecdhe_secret(SSL_HANDSHAKE *hs, bool *out_need_retry, SSL_CLIENT_HELLO *client_hello) { SSL *const ssl = hs->ssl; @@ -307,16 +313,15 @@ static enum ssl_hs_wait_t do_select_parameters(SSL_HANDSHAKE *hs) { static enum ssl_ticket_aead_result_t select_session( SSL_HANDSHAKE *hs, uint8_t *out_alert, UniquePtr *out_session, - int32_t *out_ticket_age_skew, const SSLMessage &msg, - const SSL_CLIENT_HELLO *client_hello) { + int32_t *out_ticket_age_skew, bool *out_offered_ticket, + const SSLMessage &msg, const SSL_CLIENT_HELLO *client_hello) { SSL *const ssl = hs->ssl; - *out_session = NULL; + *out_session = nullptr; - // Decode the ticket if we agreed on a PSK key exchange mode. CBS pre_shared_key; - if (!hs->accept_psk_mode || - !ssl_client_hello_get_extension(client_hello, &pre_shared_key, - TLSEXT_TYPE_pre_shared_key)) { + *out_offered_ticket = ssl_client_hello_get_extension( + client_hello, &pre_shared_key, TLSEXT_TYPE_pre_shared_key); + if (!*out_offered_ticket) { return ssl_ticket_aead_ignore_ticket; } @@ -337,6 +342,11 @@ static enum ssl_ticket_aead_result_t select_session( return ssl_ticket_aead_error; } + // If the peer did not offer psk_dhe, ignore the resumption. + if (!hs->accept_psk_mode) { + return ssl_ticket_aead_ignore_ticket; + } + // TLS 1.3 session tickets are renewed separately as part of the // NewSessionTicket. bool unused_renew; @@ -406,10 +416,18 @@ static enum ssl_hs_wait_t do_select_session(SSL_HANDSHAKE *hs) { uint8_t alert = SSL_AD_DECODE_ERROR; UniquePtr session; - switch (select_session(hs, &alert, &session, &ssl->s3->ticket_age_skew, msg, - &client_hello)) { + bool offered_ticket = false; + switch (select_session(hs, &alert, &session, &ssl->s3->ticket_age_skew, + &offered_ticket, msg, &client_hello)) { case ssl_ticket_aead_ignore_ticket: assert(!session); + if (!ssl->enable_early_data) { + ssl->s3->early_data_reason = ssl_early_data_disabled; + } else if (!offered_ticket) { + ssl->s3->early_data_reason = ssl_early_data_no_session_offered; + } else { + ssl->s3->early_data_reason = ssl_early_data_session_not_resumed; + } if (!ssl_get_new_session(hs, 1 /* server */)) { ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); return ssl_hs_error; @@ -421,26 +439,34 @@ static enum ssl_hs_wait_t do_select_session(SSL_HANDSHAKE *hs) { // a fresh session. hs->new_session = SSL_SESSION_dup(session.get(), SSL_SESSION_DUP_AUTH_ONLY); + if (hs->new_session == nullptr) { + ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); + return ssl_hs_error; + } - if (ssl->enable_early_data && - // Early data must be acceptable for this ticket. - session->ticket_max_early_data != 0 && - // The client must have offered early data. - hs->early_data_offered && + if (!ssl->enable_early_data) { + ssl->s3->early_data_reason = ssl_early_data_disabled; + } else if (session->ticket_max_early_data == 0) { + ssl->s3->early_data_reason = ssl_early_data_unsupported_for_session; + } else if (!hs->early_data_offered) { + ssl->s3->early_data_reason = ssl_early_data_peer_declined; + } else if (ssl->s3->channel_id_valid) { // Channel ID is incompatible with 0-RTT. - !ssl->s3->channel_id_valid && - // If Token Binding is negotiated, reject 0-RTT. - !ssl->s3->token_binding_negotiated && - // The negotiated ALPN must match the one in the ticket. - MakeConstSpan(ssl->s3->alpn_selected) == session->early_alpn) { + ssl->s3->early_data_reason = ssl_early_data_channel_id; + } else if (ssl->s3->token_binding_negotiated) { + // Token Binding is incompatible with 0-RTT. + ssl->s3->early_data_reason = ssl_early_data_token_binding; + } else if (MakeConstSpan(ssl->s3->alpn_selected) != session->early_alpn) { + // The negotiated ALPN must match the one in the ticket. + ssl->s3->early_data_reason = ssl_early_data_alpn_mismatch; + } else if (ssl->s3->ticket_age_skew < -kMaxTicketAgeSkewSeconds || + kMaxTicketAgeSkewSeconds < ssl->s3->ticket_age_skew) { + ssl->s3->early_data_reason = ssl_early_data_ticket_age_skew; + } else { + ssl->s3->early_data_reason = ssl_early_data_accepted; ssl->s3->early_data_accepted = true; } - if (hs->new_session == NULL) { - ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); - return ssl_hs_error; - } - ssl->s3->session_reused = true; // Resumption incorporates fresh key material, so refresh the timeout. @@ -499,7 +525,10 @@ static enum ssl_hs_wait_t do_select_session(SSL_HANDSHAKE *hs) { bool need_retry; if (!resolve_ecdhe_secret(hs, &need_retry, &client_hello)) { if (need_retry) { - ssl->s3->early_data_accepted = false; + if (ssl->s3->early_data_accepted) { + ssl->s3->early_data_reason = ssl_early_data_hello_retry_request; + ssl->s3->early_data_accepted = false; + } ssl->s3->skip_early_data = true; ssl->method->next_message(ssl); if (!hs->transcript.UpdateForHelloRetryRequest()) { @@ -950,7 +979,15 @@ static enum ssl_hs_wait_t do_send_new_session_ticket(SSL_HANDSHAKE *hs) { } hs->tls13_state = state_done; - return sent_tickets ? ssl_hs_flush : ssl_hs_ok; + // In TLS 1.3, the NewSessionTicket isn't flushed until the server performs a + // write, to prevent a non-reading client from causing the server to hang in + // the case of a small server write buffer. Consumers which don't write data + // to the client will need to do a zero-byte write if they wish to flush the + // tickets. + if (hs->ssl->ctx->quic_method != nullptr && sent_tickets) { + return ssl_hs_flush; + } + return ssl_hs_ok; } enum ssl_hs_wait_t tls13_server_handshake(SSL_HANDSHAKE *hs) { diff --git a/Sources/CNIOBoringSSL/ssl/tls_method.cc b/Sources/CNIOBoringSSL/ssl/tls_method.cc index 843f6375..ea6cc8be 100644 --- a/Sources/CNIOBoringSSL/ssl/tls_method.cc +++ b/Sources/CNIOBoringSSL/ssl/tls_method.cc @@ -125,9 +125,9 @@ static const SSL_PROTOCOL_METHOD kTLSProtocolMethod = { ssl3_set_write_state, }; -static int ssl_noop_x509_check_client_CA_names( +static bool ssl_noop_x509_check_client_CA_names( STACK_OF(CRYPTO_BUFFER) *names) { - return 1; + return true; } static void ssl_noop_x509_clear(CERT *cert) {} @@ -135,29 +135,29 @@ static void ssl_noop_x509_free(CERT *cert) {} static void ssl_noop_x509_dup(CERT *new_cert, const CERT *cert) {} static void ssl_noop_x509_flush_cached_leaf(CERT *cert) {} static void ssl_noop_x509_flush_cached_chain(CERT *cert) {} -static int ssl_noop_x509_session_cache_objects(SSL_SESSION *sess) { - return 1; +static bool ssl_noop_x509_session_cache_objects(SSL_SESSION *sess) { + return true; } -static int ssl_noop_x509_session_dup(SSL_SESSION *new_session, - const SSL_SESSION *session) { - return 1; +static bool ssl_noop_x509_session_dup(SSL_SESSION *new_session, + const SSL_SESSION *session) { + return true; } static void ssl_noop_x509_session_clear(SSL_SESSION *session) {} -static int ssl_noop_x509_session_verify_cert_chain(SSL_SESSION *session, - SSL_HANDSHAKE *hs, - uint8_t *out_alert) { - return 0; +static bool ssl_noop_x509_session_verify_cert_chain(SSL_SESSION *session, + SSL_HANDSHAKE *hs, + uint8_t *out_alert) { + return false; } static void ssl_noop_x509_hs_flush_cached_ca_names(SSL_HANDSHAKE *hs) {} -static int ssl_noop_x509_ssl_new(SSL_HANDSHAKE *hs) { return 1; } +static bool ssl_noop_x509_ssl_new(SSL_HANDSHAKE *hs) { return true; } static void ssl_noop_x509_ssl_config_free(SSL_CONFIG *cfg) {} static void ssl_noop_x509_ssl_flush_cached_client_CA(SSL_CONFIG *cfg) {} -static int ssl_noop_x509_ssl_auto_chain_if_needed(SSL_HANDSHAKE *hs) { - return 1; +static bool ssl_noop_x509_ssl_auto_chain_if_needed(SSL_HANDSHAKE *hs) { + return true; } -static int ssl_noop_x509_ssl_ctx_new(SSL_CTX *ctx) { return 1; } -static void ssl_noop_x509_ssl_ctx_free(SSL_CTX *ctx) { } +static bool ssl_noop_x509_ssl_ctx_new(SSL_CTX *ctx) { return true; } +static void ssl_noop_x509_ssl_ctx_free(SSL_CTX *ctx) {} static void ssl_noop_x509_ssl_ctx_flush_cached_client_CA(SSL_CTX *ctx) {} const SSL_X509_METHOD ssl_noop_x509_method = { diff --git a/Sources/CNIOBoringSSL/third_party/sike/P503.c b/Sources/CNIOBoringSSL/third_party/sike/P503.c new file mode 100644 index 00000000..b8463e76 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/P503.c @@ -0,0 +1,100 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: supersingular isogeny parameters and generation of functions for P503 +*********************************************************************************************/ + +#include "utils.h" + +// Parameters for isogeny system "SIKEp503" +const struct params_t p503 = { + .prime = { + U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), + U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0xABFFFFFFFFFFFFFF), + U64_TO_WORDS(0x13085BDA2211E7A0), U64_TO_WORDS(0x1B9BF6C87B7E7DAF), + U64_TO_WORDS(0x6045C6BDDA77A4D0), U64_TO_WORDS(0x004066F541811E1E) + }, + .prime_p1 = { + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xAC00000000000000), + U64_TO_WORDS(0x13085BDA2211E7A0), U64_TO_WORDS(0x1B9BF6C87B7E7DAF), + U64_TO_WORDS(0x6045C6BDDA77A4D0), U64_TO_WORDS(0x004066F541811E1E) + }, + .prime_x2 = { + U64_TO_WORDS(0xFFFFFFFFFFFFFFFE), U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), + U64_TO_WORDS(0xFFFFFFFFFFFFFFFF), U64_TO_WORDS(0x57FFFFFFFFFFFFFF), + U64_TO_WORDS(0x2610B7B44423CF41), U64_TO_WORDS(0x3737ED90F6FCFB5E), + U64_TO_WORDS(0xC08B8D7BB4EF49A0), U64_TO_WORDS(0x0080CDEA83023C3C) + }, + .A_gen = { + U64_TO_WORDS(0xE7EF4AA786D855AF), U64_TO_WORDS(0xED5758F03EB34D3B), + U64_TO_WORDS(0x09AE172535A86AA9), U64_TO_WORDS(0x237B9CC07D622723), + U64_TO_WORDS(0xE3A284CBA4E7932D), U64_TO_WORDS(0x27481D9176C5E63F), + U64_TO_WORDS(0x6A323FF55C6E71BF), U64_TO_WORDS(0x002ECC31A6FB8773), // XPA0 + U64_TO_WORDS(0x64D02E4E90A620B8), U64_TO_WORDS(0xDAB8128537D4B9F1), + U64_TO_WORDS(0x4BADF77B8A228F98), U64_TO_WORDS(0x0F5DBDF9D1FB7D1B), + U64_TO_WORDS(0xBEC4DB288E1A0DCC), U64_TO_WORDS(0xE76A8665E80675DB), + U64_TO_WORDS(0x6D6F252E12929463), U64_TO_WORDS(0x003188BD1463FACC), // XPA1 + U64_TO_WORDS(0xB79D41025DE85D56), U64_TO_WORDS(0x0B867DA9DF169686), + U64_TO_WORDS(0x740E5368021C827D), U64_TO_WORDS(0x20615D72157BF25C), + U64_TO_WORDS(0xFF1590013C9B9F5B), U64_TO_WORDS(0xC884DCADE8C16CEA), + U64_TO_WORDS(0xEBD05E53BF724E01), U64_TO_WORDS(0x0032FEF8FDA5748C), // XQA0 + U64_TO_WORDS(0x12E2E849AA0A8006), U64_TO_WORDS(0x41CF47008635A1E8), + U64_TO_WORDS(0x9CD720A70798AED7), U64_TO_WORDS(0x42A820B42FCF04CF), + U64_TO_WORDS(0x7BF9BAD32AAE88B1), U64_TO_WORDS(0xF619127A54090BBE), + U64_TO_WORDS(0x1CB10D8F56408EAA), U64_TO_WORDS(0x001D6B54C3C0EDEB), // XRA0 + U64_TO_WORDS(0x34DB54931CBAAC36), U64_TO_WORDS(0x420A18CB8DD5F0C4), + U64_TO_WORDS(0x32008C1A48C0F44D), U64_TO_WORDS(0x3B3BA772B1CFD44D), + U64_TO_WORDS(0xA74B058FDAF13515), U64_TO_WORDS(0x095FC9CA7EEC17B4), + U64_TO_WORDS(0x448E829D28F120F8), U64_TO_WORDS(0x00261EC3ED16A489) // XRA1 + }, + .B_gen = { + U64_TO_WORDS(0x7EDE37F4FA0BC727), U64_TO_WORDS(0xF7F8EC5C8598941C), + U64_TO_WORDS(0xD15519B516B5F5C8), U64_TO_WORDS(0xF6D5AC9B87A36282), + U64_TO_WORDS(0x7B19F105B30E952E), U64_TO_WORDS(0x13BD8B2025B4EBEE), + U64_TO_WORDS(0x7B96D27F4EC579A2), U64_TO_WORDS(0x00140850CAB7E5DE), // XPB0 + U64_TO_WORDS(0x7764909DAE7B7B2D), U64_TO_WORDS(0x578ABB16284911AB), + U64_TO_WORDS(0x76E2BFD146A6BF4D), U64_TO_WORDS(0x4824044B23AA02F0), + U64_TO_WORDS(0x1105048912A321F3), U64_TO_WORDS(0xB8A2E482CF0F10C1), + U64_TO_WORDS(0x42FF7D0BE2152085), U64_TO_WORDS(0x0018E599C5223352), // XPB1 + U64_TO_WORDS(0x4256C520FB388820), U64_TO_WORDS(0x744FD7C3BAAF0A13), + U64_TO_WORDS(0x4B6A2DDDB12CBCB8), U64_TO_WORDS(0xE46826E27F427DF8), + U64_TO_WORDS(0xFE4A663CD505A61B), U64_TO_WORDS(0xD6B3A1BAF025C695), + U64_TO_WORDS(0x7C3BB62B8FCC00BD), U64_TO_WORDS(0x003AFDDE4A35746C), // XQB0 + U64_TO_WORDS(0x75601CD1E6C0DFCB), U64_TO_WORDS(0x1A9007239B58F93E), + U64_TO_WORDS(0xC1F1BE80C62107AC), U64_TO_WORDS(0x7F513B898F29FF08), + U64_TO_WORDS(0xEA0BEDFF43E1F7B2), U64_TO_WORDS(0x2C6D94018CBAE6D0), + U64_TO_WORDS(0x3A430D31BCD84672), U64_TO_WORDS(0x000D26892ECCFE83), // XRB0 + U64_TO_WORDS(0x1119D62AEA3007A1), U64_TO_WORDS(0xE3702AA4E04BAE1B), + U64_TO_WORDS(0x9AB96F7D59F990E7), U64_TO_WORDS(0xF58440E8B43319C0), + U64_TO_WORDS(0xAF8134BEE1489775), U64_TO_WORDS(0xE7F7774E905192AA), + U64_TO_WORDS(0xF54AE09308E98039), U64_TO_WORDS(0x001EF7A041A86112) // XRB1 + }, + .mont_R2 = { + U64_TO_WORDS(0x5289A0CF641D011F), U64_TO_WORDS(0x9B88257189FED2B9), + U64_TO_WORDS(0xA3B365D58DC8F17A), U64_TO_WORDS(0x5BC57AB6EFF168EC), + U64_TO_WORDS(0x9E51998BD84D4423), U64_TO_WORDS(0xBF8999CBAC3B5695), + U64_TO_WORDS(0x46E9127BCE14CDB6), U64_TO_WORDS(0x003F6CFCE8B81771) + }, + .mont_one = { + U64_TO_WORDS(0x00000000000003F9), U64_TO_WORDS(0x0000000000000000), + U64_TO_WORDS(0x0000000000000000), U64_TO_WORDS(0xB400000000000000), + U64_TO_WORDS(0x63CB1A6EA6DED2B4), U64_TO_WORDS(0x51689D8D667EB37D), + U64_TO_WORDS(0x8ACD77C71AB24142), U64_TO_WORDS(0x0026FBAEC60F5953) + }, + .A_strat = { + 61, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, + 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, + 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 29, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, + 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, + 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1 + }, + .B_strat = { + 71, 38, 21, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, + 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, + 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, + 1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, + 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, + 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 + } +}; diff --git a/Sources/CNIOBoringSSL/third_party/sike/asm/fp_generic.c b/Sources/CNIOBoringSSL/third_party/sike/asm/fp_generic.c new file mode 100644 index 00000000..291d5df1 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/asm/fp_generic.c @@ -0,0 +1,181 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: portable modular arithmetic for P503 +*********************************************************************************************/ + +#include + +#if defined(OPENSSL_NO_ASM) || \ + (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + +#include "../utils.h" +#include "../fpx.h" + +// Global constants +extern const struct params_t p503; + +static void digit_x_digit(const crypto_word_t a, const crypto_word_t b, crypto_word_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + crypto_word_t al, ah, bl, bh, temp; + crypto_word_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + crypto_word_t mask_low = (crypto_word_t)(-1) >> (sizeof(crypto_word_t)*4); + crypto_word_t mask_high = (crypto_word_t)(-1) << (sizeof(crypto_word_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(crypto_word_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(crypto_word_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(crypto_word_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(crypto_word_t) * 4); + c[0] ^= temp << (sizeof(crypto_word_t) * 4); // C01 + + res1 = ahbl >> (sizeof(crypto_word_t) * 4); + res2 = albh >> (sizeof(crypto_word_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + +void sike_fpadd(const felm_t a, const felm_t b, felm_t c) +{ // Modular addition, c = a+b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, carry = 0; + crypto_word_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], p503.prime_x2[i], carry, c[i]); + } + mask = 0 - (crypto_word_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], p503.prime_x2[i] & mask, carry, c[i]); + } +} + +void sike_fpsub(const felm_t a, const felm_t b, felm_t c) +{ // Modular subtraction, c = a-b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, borrow = 0; + crypto_word_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (crypto_word_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], p503.prime_x2[i] & mask, borrow, c[i]); + } +} + +void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = NWORDS_FIELD. + unsigned int i, j; + crypto_word_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*NWORDS_FIELD-1] = v; +} + +void sike_fprdc(const felm_t ma, felm_t mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503. + // mc = ma*R^-1 mod p503x2, where R = 2^512. + // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p503_ZERO_WORDS; + crypto_word_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p503_ZERO_WORDS+1)) { + MUL(mc[j], p503.prime_p1[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], p503.prime_p1[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} + +#endif // NO_ASM || (!X86_64 && !AARCH64) diff --git a/Sources/CNIOBoringSSL/third_party/sike/fpx.c b/Sources/CNIOBoringSSL/third_party/sike/fpx.c new file mode 100644 index 00000000..f2127484 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/fpx.c @@ -0,0 +1,305 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: core functions over GF(p) and GF(p^2) +*********************************************************************************************/ +#include + +#include "utils.h" +#include "fpx.h" + +extern const struct params_t p503; + +// Multiprecision squaring, c = a^2 mod p. +static void fpsqr_mont(const felm_t ma, felm_t mc) +{ + dfelm_t temp = {0}; + sike_mpmul(ma, ma, temp); + sike_fprdc(temp, mc); +} + +// Chain to compute a^(p-3)/4 using Montgomery arithmetic. +static void fpinv_chain_mont(felm_t a) +{ + unsigned int i, j; + felm_t t[15], tt; + + // Precomputed table + fpsqr_mont(a, tt); + sike_fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 13; i++) sike_fpmul_mont(t[i], tt, t[i+1]); + + sike_fpcopy(a, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[9], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[0], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[8], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[0], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[5], tt, tt); + for (i = 0; i < 12; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[12], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[12], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[11], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[14], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[4], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[5], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[7], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[0], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[11], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[10], tt, tt); + for (j = 0; j < 49; j++) { + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + sike_fpmul_mont(t[14], tt, tt); + } + sike_fpcopy(tt, a); +} + +// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. +static void fpinv_mont(felm_t a) +{ + felm_t tt = {0}; + sike_fpcopy(a, tt); + fpinv_chain_mont(tt); + fpsqr_mont(tt, tt); + fpsqr_mont(tt, tt); + sike_fpmul_mont(a, tt, a); +} + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) +inline static unsigned int mp_add(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { + uint8_t carry = 0; + for (size_t i = 0; i < nwords; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + return carry; +} + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. +inline static unsigned int mp_sub(const felm_t a, const felm_t b, felm_t c, const unsigned int nwords) { + uint32_t borrow = 0; + for (size_t i = 0; i < nwords; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + return borrow; +} +#endif + +// Multiprecision addition, c = a+b. +inline static void mp_addfast(const felm_t a, const felm_t b, felm_t c) +{ +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + mp_add(a, b, c, NWORDS_FIELD); +#else + sike_mpadd_asm(a, b, c); +#endif +} + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. +// If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 +inline static crypto_word_t mp_subfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + return (0 - (crypto_word_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); +#else + return sike_mpsubx2_asm(a, b, c); +#endif +} + +// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. +// Inputs should be s.t. c > a and c > b +inline static void mp_dblsubfast(const dfelm_t a, const dfelm_t b, dfelm_t c) { +#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_X86_64) && !defined(OPENSSL_AARCH64)) + mp_sub(c, a, c, 2*NWORDS_FIELD); + mp_sub(c, b, c, 2*NWORDS_FIELD); +#else + sike_mpdblsubx2_asm(a, b, c); +#endif +} + +// Copy a field element, c = a. +void sike_fpcopy(const felm_t a, felm_t c) { + for (size_t i = 0; i < NWORDS_FIELD; i++) { + c[i] = a[i]; + } +} + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 +void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) +{ + dfelm_t temp = {0}; + sike_mpmul(ma, mb, temp); + sike_fprdc(temp, mc); +} + +// Conversion from Montgomery representation to standard representation, +// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. +void sike_from_mont(const felm_t ma, felm_t c) +{ + felm_t one = {0}; + one[0] = 1; + + sike_fpmul_mont(ma, one, c); + sike_fpcorrection(c); +} + +// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). +// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] +// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] +void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c) { + felm_t t1, t2, t3; + + mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 + sike_fpsub(a->c0, a->c1, t2); // t2 = a0-a1 + mp_addfast(a->c0, a->c0, t3); // t3 = 2a0 + sike_fpmul_mont(t1, t2, c->c0); // c0 = (a0+a1)(a0-a1) + sike_fpmul_mont(t3, a->c1, c->c1); // c1 = 2a0*a1 +} + +// Modular negation, a = -a mod p503. +// Input/output: a in [0, 2*p503-1] +void sike_fpneg(felm_t a) { + uint32_t borrow = 0; + for (size_t i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, p503.prime_x2[i], a[i], borrow, a[i]); + } +} + +// Modular division by two, c = a/2 mod p503. +// Input : a in [0, 2*p503-1] +// Output: c in [0, 2*p503-1] +void sike_fpdiv2(const felm_t a, felm_t c) { + uint32_t carry = 0; + crypto_word_t mask; + + mask = 0 - (crypto_word_t)(a[0] & 1); // If a is odd compute a+p503 + for (size_t i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], p503.prime[i] & mask, carry, c[i]); + } + + // Multiprecision right shift by one. + for (size_t i = 0; i < NWORDS_FIELD-1; i++) { + c[i] = (c[i] >> 1) ^ (c[i+1] << (RADIX - 1)); + } + c[NWORDS_FIELD-1] >>= 1; +} + +// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. +void sike_fpcorrection(felm_t a) { + uint32_t borrow = 0; + crypto_word_t mask; + + for (size_t i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], p503.prime[i], borrow, a[i]); + } + mask = 0 - (crypto_word_t)borrow; + + borrow = 0; + for (size_t i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], p503.prime[i] & mask, borrow, a[i]); + } +} + +// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). +// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] +// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] +void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c) { + felm_t t1, t2; + dfelm_t tt1, tt2, tt3; + crypto_word_t mask; + + mp_addfast(a->c0, a->c1, t1); // t1 = a0+a1 + mp_addfast(b->c0, b->c1, t2); // t2 = b0+b1 + sike_mpmul(a->c0, b->c0, tt1); // tt1 = a0*b0 + sike_mpmul(a->c1, b->c1, tt2); // tt2 = a1*b1 + sike_mpmul(t1, t2, tt3); // tt3 = (a0+a1)*(b0+b1) + mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + mask = mp_subfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1. If tt1 < 0 then mask = 0xFF..F, else if tt1 >= 0 then mask = 0x00..0 + + for (size_t i = 0; i < NWORDS_FIELD; i++) { + t1[i] = p503.prime[i] & mask; + } + + sike_fprdc(tt3, c->c1); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + mp_addfast(&tt1[NWORDS_FIELD], t1, &tt1[NWORDS_FIELD]); + sike_fprdc(tt1, c->c0); // c[0] = a0*b0 - a1*b1 +} + +// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). +void sike_fp2inv_mont(f2elm_t a) { + f2elm_t t1; + + fpsqr_mont(a->c0, t1->c0); // t10 = a0^2 + fpsqr_mont(a->c1, t1->c1); // t11 = a1^2 + sike_fpadd(t1->c0, t1->c1, t1->c0); // t10 = a0^2+a1^2 + fpinv_mont(t1->c0); // t10 = (a0^2+a1^2)^-1 + sike_fpneg(a->c1); // a = a0-i*a1 + sike_fpmul_mont(a->c0, t1->c0, a->c0); + sike_fpmul_mont(a->c1, t1->c0, a->c1); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} diff --git a/Sources/CNIOBoringSSL/third_party/sike/fpx.h b/Sources/CNIOBoringSSL/third_party/sike/fpx.h new file mode 100644 index 00000000..e787c28c --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/fpx.h @@ -0,0 +1,112 @@ +#ifndef FPX_H_ +#define FPX_H_ + +#include "utils.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +// Modular addition, c = a+b mod p503. +void sike_fpadd(const felm_t a, const felm_t b, felm_t c); +// Modular subtraction, c = a-b mod p503. +void sike_fpsub(const felm_t a, const felm_t b, felm_t c); +// Modular division by two, c = a/2 mod p503. +void sike_fpdiv2(const felm_t a, felm_t c); +// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. +void sike_fpcorrection(felm_t a); +// Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. +void sike_mpmul(const felm_t a, const felm_t b, dfelm_t c); +// 503-bit Montgomery reduction, c = a mod p +void sike_fprdc(const dfelm_t a, felm_t c); +// Double 2x503-bit multiprecision subtraction, c = c-a-b +void sike_mpdblsubx2_asm(const felm_t a, const felm_t b, felm_t c); +// Multiprecision subtraction, c = a-b +crypto_word_t sike_mpsubx2_asm(const dfelm_t a, const dfelm_t b, dfelm_t c); +// 503-bit multiprecision addition, c = a+b +void sike_mpadd_asm(const felm_t a, const felm_t b, felm_t c); +// Modular negation, a = -a mod p503. +void sike_fpneg(felm_t a); +// Copy of a field element, c = a +void sike_fpcopy(const felm_t a, felm_t c); +// Copy a field element, c = a. +void sike_fpzero(felm_t a); +// If option = 0xFF...FF x=y; y=x, otherwise swap doesn't happen. Constant time. +void sike_cswap_asm(point_proj_t x, point_proj_t y, const crypto_word_t option); +// Conversion from Montgomery representation to standard representation, +// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. +void sike_from_mont(const felm_t ma, felm_t c); +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 +void sike_fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc); +// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2) +void sike_fp2mul_mont(const f2elm_t a, const f2elm_t b, f2elm_t c); +// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void sike_fp2inv_mont(f2elm_t a); +// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). +void sike_fp2sqr_mont(const f2elm_t a, f2elm_t c); +// Modular correction, a = a in GF(p^2). +void sike_fp2correction(f2elm_t a); + +#if defined(__cplusplus) +} // extern C +#endif + +// GF(p^2) addition, c = a+b in GF(p^2). +#define sike_fp2add(a, b, c) \ +do { \ + sike_fpadd(a->c0, b->c0, c->c0); \ + sike_fpadd(a->c1, b->c1, c->c1); \ +} while(0) + +// GF(p^2) subtraction, c = a-b in GF(p^2). +#define sike_fp2sub(a,b,c) \ +do { \ + sike_fpsub(a->c0, b->c0, c->c0); \ + sike_fpsub(a->c1, b->c1, c->c1); \ +} while(0) + +// Copy a GF(p^2) element, c = a. +#define sike_fp2copy(a, c) \ +do { \ + sike_fpcopy(a->c0, c->c0); \ + sike_fpcopy(a->c1, c->c1); \ +} while(0) + +// GF(p^2) negation, a = -a in GF(p^2). +#define sike_fp2neg(a) \ +do { \ + sike_fpneg(a->c0); \ + sike_fpneg(a->c1); \ +} while(0) + +// GF(p^2) division by two, c = a/2 in GF(p^2). +#define sike_fp2div2(a, c) \ +do { \ + sike_fpdiv2(a->c0, c->c0); \ + sike_fpdiv2(a->c1, c->c1); \ +} while(0) + +// Modular correction, a = a in GF(p^2). +#define sike_fp2correction(a) \ +do { \ + sike_fpcorrection(a->c0); \ + sike_fpcorrection(a->c1); \ +} while(0) + +// Conversion of a GF(p^2) element to Montgomery representation, +// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). +#define sike_to_fp2mont(a, mc) \ +do { \ + sike_fpmul_mont(a->c0, p503.mont_R2, mc->c0); \ + sike_fpmul_mont(a->c1, p503.mont_R2, mc->c1); \ +} while(0) + +// Conversion of a GF(p^2) element from Montgomery representation to standard representation, +// c_i = ma_i*R^(-1) = a_i in GF(p^2). +#define sike_from_fp2mont(ma, c) \ +do { \ + sike_from_mont(ma->c0, c->c0); \ + sike_from_mont(ma->c1, c->c1); \ +} while(0) + +#endif // FPX_H_ diff --git a/Sources/CNIOBoringSSL/third_party/sike/isogeny.c b/Sources/CNIOBoringSSL/third_party/sike/isogeny.c new file mode 100644 index 00000000..b8807f30 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/isogeny.c @@ -0,0 +1,260 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: elliptic curve and isogeny functions +*********************************************************************************************/ +#include "utils.h" +#include "isogeny.h" +#include "fpx.h" + +static void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + f2elm_t t0, t1; + + sike_fp2sub(P->X, P->Z, t0); // t0 = X1-Z1 + sike_fp2add(P->X, P->Z, t1); // t1 = X1+Z1 + sike_fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 + sike_fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 + sike_fp2mul_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + sike_fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + sike_fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + sike_fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + sike_fp2add(Q->Z, t0, Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + sike_fp2mul_mont(Q->Z, t1, Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, size_t e) +{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q <- (2^e)*P. + + memmove(Q, P, sizeof(*P)); + for (size_t i = 0; i < e; i++) { + xDBL(Q, Q, A24plus, C24); + } +} + +void get_4_isog(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff) +{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients + // that are used to evaluate the isogeny at a point in eval_4_isog(). + + sike_fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4 + sike_fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4 + sike_fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2 + sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2 + sike_fp2sqr_mont(coeff[0], C24); // C24 = 4*Z4^4 + sike_fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2 + sike_fp2sqr_mont(P->X, A24plus); // A24plus = X4^2 + sike_fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 + sike_fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 +} + +void eval_4_isog(point_proj_t P, f2elm_t* coeff) +{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 3 coefficients in coeff (computed in the function get_4_isog()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + f2elm_t t0, t1; + + sike_fp2add(P->X, P->Z, t0); // t0 = X+Z + sike_fp2sub(P->X, P->Z, t1); // t1 = X-Z + sike_fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1] + sike_fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2] + sike_fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) + sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) + sike_fp2add(P->X, P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] + sike_fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] + sike_fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + sike_fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 + sike_fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + sike_fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) + sike_fp2mul_mont(P->X, t1, P->X); // Xfinal + sike_fp2mul_mont(P->Z, t0, P->Z); // Zfinal +} + + +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus) +{ // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + f2elm_t t0, t1, t2, t3, t4, t5, t6; + + sike_fp2sub(P->X, P->Z, t0); // t0 = X-Z + sike_fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 + sike_fp2add(P->X, P->Z, t1); // t1 = X+Z + sike_fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 + sike_fp2add(t0, t1, t4); // t4 = 2*X + sike_fp2sub(t1, t0, t0); // t0 = 2*Z + sike_fp2sqr_mont(t4, t1); // t1 = 4*X^2 + sike_fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 + sike_fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + sike_fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 + sike_fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 + sike_fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 + sike_fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 + sike_fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 + sike_fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 + sike_fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + sike_fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 + sike_fp2sqr_mont(t2, t2); // t2 = t2^2 + sike_fp2mul_mont(t4, t2, Q->X); // X3 = 2*X*t2 + sike_fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + sike_fp2sqr_mont(t1, t1); // t1 = t1^2 + sike_fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1 +} + +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, size_t e) +{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q <- (3^e)*P. + memmove(Q, P, sizeof(*P)); + for (size_t i = 0; i < e; i++) { + xTPL(Q, Q, A24minus, A24plus); + } +} + +void get_3_isog(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t* coeff) +{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + f2elm_t t0, t1, t2, t3, t4; + + sike_fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z + sike_fp2sqr_mont(coeff[0], t0); // t0 = (X-Z)^2 + sike_fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z + sike_fp2sqr_mont(coeff[1], t1); // t1 = (X+Z)^2 + sike_fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 + sike_fp2add(coeff[0], coeff[1], t3); // t3 = 2*X + sike_fp2sqr_mont(t3, t3); // t3 = 4*X^2 + sike_fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + sike_fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 + sike_fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 + sike_fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 + sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) + sike_fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 + sike_fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + sike_fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 + sike_fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) + sike_fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 + sike_fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] + sike_fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + sike_fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 +} + + +void eval_3_isog(point_proj_t Q, f2elm_t* coeff) +{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and + // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point Q <- phi(Q) = (X3:Z3). + f2elm_t t0, t1, t2; + + sike_fp2add(Q->X, Q->Z, t0); // t0 = X+Z + sike_fp2sub(Q->X, Q->Z, t1); // t1 = X-Z + sike_fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z) + sike_fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z) + sike_fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) + sike_fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) + sike_fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 + sike_fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 + sike_fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 + sike_fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 +} + + +void inv_3_way(f2elm_t z1, f2elm_t z2, f2elm_t z3) +{ // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + f2elm_t t0, t1, t2, t3; + + sike_fp2mul_mont(z1, z2, t0); // t0 = z1*z2 + sike_fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 + sike_fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) + sike_fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) + sike_fp2mul_mont(t2, z2, t3); // t3 = 1/z1 + sike_fp2mul_mont(t2, z1, z2); // z2 = 1/z2 + sike_fp2mul_mont(t0, t1, z3); // z3 = 1/z3 + sike_fp2copy(t3, z1); // z1 = 1/z1 +} + + +void get_A(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A) +{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + f2elm_t t0, t1, one = F2ELM_INIT; + + extern const struct params_t p503; + sike_fpcopy(p503.mont_one, one->c0); + sike_fp2add(xP, xQ, t1); // t1 = xP+xQ + sike_fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ + sike_fp2mul_mont(xR, t1, A); // A = xR*t1 + sike_fp2add(t0, A, A); // A = A+t0 + sike_fp2mul_mont(t0, xR, t0); // t0 = t0*xR + sike_fp2sub(A, one, A); // A = A-1 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2add(t1, xR, t1); // t1 = t1+xR + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2sqr_mont(A, A); // A = A^2 + sike_fp2inv_mont(t0); // t0 = 1/t0 + sike_fp2mul_mont(A, t0, A); // A = A*t0 + sike_fp2sub(A, t1, A); // Afinal = A-t1 +} + + +void j_inv(const f2elm_t A, const f2elm_t C, f2elm_t jinv) +{ // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + f2elm_t t0, t1; + + sike_fp2sqr_mont(A, jinv); // jinv = A^2 + sike_fp2sqr_mont(C, t1); // t1 = C^2 + sike_fp2add(t1, t1, t0); // t0 = t1+t1 + sike_fp2sub(jinv, t0, t0); // t0 = jinv-t0 + sike_fp2sub(t0, t1, t0); // t0 = t0-t1 + sike_fp2sub(t0, t1, jinv); // jinv = t0-t1 + sike_fp2sqr_mont(t1, t1); // t1 = t1^2 + sike_fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2sqr_mont(t0, t1); // t1 = t0^2 + sike_fp2mul_mont(t0, t1, t0); // t0 = t0*t1 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2add(t0, t0, t0); // t0 = t0+t0 + sike_fp2inv_mont(jinv); // jinv = 1/jinv + sike_fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv +} + + +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + f2elm_t t0, t1, t2; + + sike_fp2add(P->X, P->Z, t0); // t0 = XP+ZP + sike_fp2sub(P->X, P->Z, t1); // t1 = XP-ZP + sike_fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2 + sike_fp2sub(Q->X, Q->Z, t2); // t2 = XQ-ZQ + sike_fp2correction(t2); + sike_fp2add(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + sike_fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + sike_fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2 + sike_fp2mul_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + sike_fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + sike_fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + sike_fp2mul_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + sike_fp2sub(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + sike_fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + sike_fp2add(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + sike_fp2mul_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + sike_fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + sike_fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + sike_fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} diff --git a/Sources/CNIOBoringSSL/third_party/sike/isogeny.h b/Sources/CNIOBoringSSL/third_party/sike/isogeny.h new file mode 100644 index 00000000..460c8c66 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/isogeny.h @@ -0,0 +1,49 @@ +#ifndef ISOGENY_H_ +#define ISOGENY_H_ + +// Computes [2^e](X:Z) on Montgomery curve with projective +// constant via e repeated doublings. +void xDBLe( + const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, + const f2elm_t C24, size_t e); +// Simultaneous doubling and differential addition. +void xDBLADD( + point_proj_t P, point_proj_t Q, const f2elm_t xPQ, + const f2elm_t A24); +// Tripling of a Montgomery point in projective coordinates (X:Z). +void xTPL( + const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, + const f2elm_t A24plus); +// Computes [3^e](X:Z) on Montgomery curve with projective constant +// via e repeated triplings. +void xTPLe( + const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, + const f2elm_t A24plus, size_t e); +// Given the x-coordinates of P, Q, and R, returns the value A +// corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void get_A( + const f2elm_t xP, const f2elm_t xQ, const f2elm_t xR, f2elm_t A); +// Computes the j-invariant of a Montgomery curve with projective constant. +void j_inv( + const f2elm_t A, const f2elm_t C, f2elm_t jinv); +// Computes the corresponding 4-isogeny of a projective Montgomery +// point (X4:Z4) of order 4. +void get_4_isog( + const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t* coeff); +// Computes the corresponding 3-isogeny of a projective Montgomery +// point (X3:Z3) of order 3. +void get_3_isog( + const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, + f2elm_t* coeff); +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) +// of order 3 on a Montgomery curve and a point P with coefficients given in coeff. +void eval_3_isog( + point_proj_t Q, f2elm_t* coeff); +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. +void eval_4_isog( + point_proj_t P, f2elm_t* coeff); +// 3-way simultaneous inversion +void inv_3_way( + f2elm_t z1, f2elm_t z2, f2elm_t z3); + +#endif // ISOGENY_H_ diff --git a/Sources/CNIOBoringSSL/third_party/sike/sike.c b/Sources/CNIOBoringSSL/third_party/sike/sike.c new file mode 100644 index 00000000..55ae50f6 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/sike.c @@ -0,0 +1,571 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: supersingular isogeny key encapsulation (SIKE) protocol +*********************************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "isogeny.h" +#include "fpx.h" + +extern const struct params_t p503; + +// Domain separation parameters for HMAC +static const uint8_t G[2] = {0,0}; +static const uint8_t H[2] = {1,0}; +static const uint8_t F[2] = {2,0}; + +// SIDHp503_JINV_BYTESZ is a number of bytes used for encoding j-invariant. +#define SIDHp503_JINV_BYTESZ 126U +// SIDHp503_PRV_A_BITSZ is a number of bits of SIDH private key (2-isogeny) +#define SIDHp503_PRV_A_BITSZ 250U +// SIDHp503_PRV_A_BITSZ is a number of bits of SIDH private key (3-isogeny) +#define SIDHp503_PRV_B_BITSZ 253U +// MAX_INT_POINTS_ALICE is a number of points used in 2-isogeny tree computation +#define MAX_INT_POINTS_ALICE 7U +// MAX_INT_POINTS_ALICE is a number of points used in 3-isogeny tree computation +#define MAX_INT_POINTS_BOB 8U + +// Produces HMAC-SHA256 of data |S| mac'ed with the key |key|. Result is stored in |out| +// which must have size of at least |outsz| bytes and must be not bigger than +// SHA256_DIGEST_LENGTH. The output of a HMAC may be truncated. +// The |key| buffer is reused by the hmac_sum and hence, it's size must be equal +// to SHA256_CBLOCK. The HMAC key provided in |key| buffer must be smaller or equal +// to SHA256_DIGHEST_LENTH. |key| can overlap |out|. +static void hmac_sum( + uint8_t *out, size_t outsz, const uint8_t S[2], uint8_t key[SHA256_CBLOCK]) { + for(size_t i=0; iX->c0[i] ^ Q->X->c0[i]); + P->X->c0[i] = temp ^ P->X->c0[i]; + Q->X->c0[i] = temp ^ Q->X->c0[i]; + temp = option & (P->Z->c0[i] ^ Q->Z->c0[i]); + P->Z->c0[i] = temp ^ P->Z->c0[i]; + Q->Z->c0[i] = temp ^ Q->Z->c0[i]; + temp = option & (P->X->c1[i] ^ Q->X->c1[i]); + P->X->c1[i] = temp ^ P->X->c1[i]; + Q->X->c1[i] = temp ^ Q->X->c1[i]; + temp = option & (P->Z->c1[i] ^ Q->Z->c1[i]); + P->Z->c1[i] = temp ^ P->Z->c1[i]; + Q->Z->c1[i] = temp ^ Q->Z->c1[i]; + } +} +#endif + +// Swap points. +// If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P +static inline void sike_fp2cswap(point_proj_t P, point_proj_t Q, const crypto_word_t option) +{ +#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) + sike_cswap_asm(P, Q, option); +#else + sike_cswap(P, Q, option); +#endif +} + +static void LADDER3PT( + const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint8_t* m, + int is_A, point_proj_t R, const f2elm_t A) { + point_proj_t R0 = POINT_PROJ_INIT, R2 = POINT_PROJ_INIT; + f2elm_t A24 = F2ELM_INIT; + crypto_word_t mask; + int bit, swap, prevbit = 0; + + const size_t nbits = is_A?SIDHp503_PRV_A_BITSZ:SIDHp503_PRV_B_BITSZ; + + // Initializing constant + sike_fpcopy(p503.mont_one, A24[0].c0); + sike_fp2add(A24, A24, A24); + sike_fp2add(A, A24, A24); + sike_fp2div2(A24, A24); + sike_fp2div2(A24, A24); // A24 = (A+2)/4 + + // Initializing points + sike_fp2copy(xQ, R0->X); + sike_fpcopy(p503.mont_one, R0->Z[0].c0); + sike_fp2copy(xPQ, R2->X); + sike_fpcopy(p503.mont_one, R2->Z[0].c0); + sike_fp2copy(xP, R->X); + sike_fpcopy(p503.mont_one, R->Z[0].c0); + memset(R->Z->c1, 0, sizeof(R->Z->c1)); + + // Main loop + for (size_t i = 0; i < nbits; i++) { + bit = (m[i >> 3] >> (i & 7)) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (crypto_word_t)swap; + + sike_fp2cswap(R, R2, mask); + xDBLADD(R0, R2, R->X, A24); + sike_fp2mul_mont(R2->X, R->Z, R2->X); + } +} + +// Initialization of basis points +static inline void sike_init_basis(const crypto_word_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR) { + sike_fpcopy(gen, XP->c0); + sike_fpcopy(gen + NWORDS_FIELD, XP->c1); + sike_fpcopy(gen + 2*NWORDS_FIELD, XQ->c0); + memset(XQ->c1, 0, sizeof(XQ->c1)); + sike_fpcopy(gen + 3*NWORDS_FIELD, XR->c0); + sike_fpcopy(gen + 4*NWORDS_FIELD, XR->c1); +} + +// Conversion of GF(p^2) element from Montgomery to standard representation. +static inline void sike_fp2_encode(const f2elm_t x, uint8_t *enc) { + f2elm_t t; + sike_from_fp2mont(x, t); + + // convert to bytes in little endian form + for (size_t i=0; i> (8*(i%LSZ))) & 0xFF; + enc[i+FIELD_BYTESZ] = (t[0].c1[i/LSZ] >> (8*(i%LSZ))) & 0xFF; + } +} + +// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation. +// Elements over GF(p503) are encoded in 63 octets in little endian format +// (i.e., the least significant octet is located in the lowest memory address). +static inline void fp2_decode(const uint8_t *enc, f2elm_t t) { + memset(t[0].c0, 0, sizeof(t[0].c0)); + memset(t[0].c1, 0, sizeof(t[0].c1)); + // convert bytes in little endian form to f2elm_t + for (size_t i = 0; i < FIELD_BYTESZ; i++) { + t[0].c0[i/LSZ] |= ((crypto_word_t)enc[i+ 0]) << (8*(i%LSZ)); + t[0].c1[i/LSZ] |= ((crypto_word_t)enc[i+FIELD_BYTESZ]) << (8*(i%LSZ)); + } + sike_to_fp2mont(t, t); +} + +// Alice's ephemeral public key generation +// Input: a private key prA in the range [0, 2^250 - 1], stored in 32 bytes. +// Output: the public key pkA consisting of 3 GF(p503^2) elements encoded in 378 bytes. +static void gen_iso_A(const uint8_t* skA, uint8_t* pkA) +{ + point_proj_t R, pts[MAX_INT_POINTS_ALICE]; + point_proj_t phiP = POINT_PROJ_INIT; + point_proj_t phiQ = POINT_PROJ_INIT; + point_proj_t phiR = POINT_PROJ_INIT; + f2elm_t XPA, XQA, XRA, coeff[3]; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t C24 = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize basis points + sike_init_basis(p503.A_gen, XPA, XQA, XRA); + sike_init_basis(p503.B_gen, phiP->X, phiQ->X, phiR->X); + sike_fpcopy(p503.mont_one, (phiP->Z)->c0); + sike_fpcopy(p503.mont_one, (phiQ->Z)->c0); + sike_fpcopy(p503.mont_one, (phiR->Z)->c0); + + // Initialize constants + sike_fpcopy(p503.mont_one, A24plus->c0); + sike_fp2add(A24plus, A24plus, C24); + + // Retrieve kernel point + LADDER3PT(XPA, XQA, XRA, skA, 1, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < A_max; row++) { + while (index < A_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = p503.A_strat[ii++]; + xDBLe(R, R, A24plus, C24, (2*m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + + for (size_t i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_4_isog(R, A24plus, C24, coeff); + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + + inv_3_way(phiP->Z, phiQ->Z, phiR->Z); + sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); + sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); + sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); + + // Format public key + sike_fp2_encode(phiP->X, pkA); + sike_fp2_encode(phiQ->X, pkA + SIDHp503_JINV_BYTESZ); + sike_fp2_encode(phiR->X, pkA + 2*SIDHp503_JINV_BYTESZ); +} + +// Bob's ephemeral key-pair generation +// It produces a private key skB and computes the public key pkB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. +static void gen_iso_B(const uint8_t* skB, uint8_t* pkB) +{ + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + point_proj_t phiP = POINT_PROJ_INIT; + point_proj_t phiQ = POINT_PROJ_INIT; + point_proj_t phiR = POINT_PROJ_INIT; + f2elm_t XPB, XQB, XRB, coeff[3]; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t A24minus = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize basis points + sike_init_basis(p503.B_gen, XPB, XQB, XRB); + sike_init_basis(p503.A_gen, phiP->X, phiQ->X, phiR->X); + sike_fpcopy(p503.mont_one, (phiP->Z)->c0); + sike_fpcopy(p503.mont_one, (phiQ->Z)->c0); + sike_fpcopy(p503.mont_one, (phiR->Z)->c0); + + // Initialize constants + sike_fpcopy(p503.mont_one, A24plus->c0); + sike_fp2add(A24plus, A24plus, A24plus); + sike_fp2copy(A24plus, A24minus); + sike_fp2neg(A24minus); + + // Retrieve kernel point + LADDER3PT(XPB, XQB, XRB, skB, 0, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < B_max; row++) { + while (index < B_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = p503.B_strat[ii++]; + xTPLe(R, R, A24minus, A24plus, m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (size_t i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + inv_3_way(phiP->Z, phiQ->Z, phiR->Z); + sike_fp2mul_mont(phiP->X, phiP->Z, phiP->X); + sike_fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X); + sike_fp2mul_mont(phiR->X, phiR->Z, phiR->X); + + // Format public key + sike_fp2_encode(phiP->X, pkB); + sike_fp2_encode(phiQ->X, pkB + SIDHp503_JINV_BYTESZ); + sike_fp2_encode(phiR->X, pkB + 2*SIDHp503_JINV_BYTESZ); +} + +// Alice's ephemeral shared secret computation +// It produces a shared secret key ssA using her secret key skA and Bob's public key pkB +// Inputs: Alice's skA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. +// Bob's pkB consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret ssA that consists of one element in GF(p503^2) encoded in 126 bytes. +static void ex_iso_A(const uint8_t* skA, const uint8_t* pkB, uint8_t* ssA) +{ + point_proj_t R, pts[MAX_INT_POINTS_ALICE]; + f2elm_t coeff[3], PKB[3], jinv; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t C24 = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize images of Bob's basis + fp2_decode(pkB, PKB[0]); + fp2_decode(pkB + SIDHp503_JINV_BYTESZ, PKB[1]); + fp2_decode(pkB + 2*SIDHp503_JINV_BYTESZ, PKB[2]); + + // Initialize constants + get_A(PKB[0], PKB[1], PKB[2], A); // TODO: Can return projective A? + sike_fpadd(p503.mont_one, p503.mont_one, C24->c0); + sike_fp2add(A, C24, A24plus); + sike_fpadd(C24->c0, C24->c0, C24->c0); + + // Retrieve kernel point + LADDER3PT(PKB[0], PKB[1], PKB[2], skA, 1, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < A_max; row++) { + while (index < A_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = p503.A_strat[ii++]; + xDBLe(R, R, A24plus, C24, (2*m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + + for (size_t i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_4_isog(R, A24plus, C24, coeff); + sike_fp2div2(C24, C24); + sike_fp2sub(A24plus, C24, A24plus); + sike_fp2div2(C24, C24); + j_inv(A24plus, C24, jinv); + sike_fp2_encode(jinv, ssA); +} + +// Bob's ephemeral shared secret computation +// It produces a shared secret key ssB using his secret key skB and Alice's public key pkA +// Inputs: Bob's skB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// Alice's pkA consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret ssB that consists of one element in GF(p503^2) encoded in 126 bytes. +static void ex_iso_B(const uint8_t* skB, const uint8_t* pkA, uint8_t* ssB) +{ + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + f2elm_t coeff[3], PKB[3], jinv; + f2elm_t A24plus = F2ELM_INIT; + f2elm_t A24minus = F2ELM_INIT; + f2elm_t A = F2ELM_INIT; + unsigned int m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize images of Alice's basis + fp2_decode(pkA, PKB[0]); + fp2_decode(pkA + SIDHp503_JINV_BYTESZ, PKB[1]); + fp2_decode(pkA + 2*SIDHp503_JINV_BYTESZ, PKB[2]); + + // Initialize constants + get_A(PKB[0], PKB[1], PKB[2], A); + sike_fpadd(p503.mont_one, p503.mont_one, A24minus->c0); + sike_fp2add(A, A24minus, A24plus); + sike_fp2sub(A, A24minus, A24minus); + + // Retrieve kernel point + LADDER3PT(PKB[0], PKB[1], PKB[2], skB, 0, R, A); + + // Traverse tree + index = 0; + for (size_t row = 1; row < B_max; row++) { + while (index < B_max-row) { + sike_fp2copy(R->X, pts[npts]->X); + sike_fp2copy(R->Z, pts[npts]->Z); + pts_index[npts++] = index; + m = p503.B_strat[ii++]; + xTPLe(R, R, A24minus, A24plus, m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (size_t i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + + sike_fp2copy(pts[npts-1]->X, R->X); + sike_fp2copy(pts[npts-1]->Z, R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + sike_fp2add(A24plus, A24minus, A); + sike_fp2add(A, A, A); + sike_fp2sub(A24plus, A24minus, A24plus); + j_inv(A, A24plus, jinv); + sike_fp2_encode(jinv, ssB); +} + +int SIKE_keypair(uint8_t out_priv[SIKEp503_PRV_BYTESZ], + uint8_t out_pub[SIKEp503_PUB_BYTESZ]) { + int ret = 0; + + // Calculate private key for Alice. Needs to be in range [0, 2^0xFA - 1] and < + // 253 bits + BIGNUM *bn_sidh_prv = BN_new(); + if (!bn_sidh_prv || + !BN_rand(bn_sidh_prv, SIDHp503_PRV_B_BITSZ, BN_RAND_TOP_ONE, + BN_RAND_BOTTOM_ANY) || + !BN_bn2le_padded(out_priv, BITS_TO_BYTES(SIDHp503_PRV_B_BITSZ), + bn_sidh_prv)) { + goto end; + } + + gen_iso_B(out_priv, out_pub); + ret = 1; + +end: + BN_free(bn_sidh_prv); + return ret; +} + +void SIKE_encaps(uint8_t out_shared_key[SIKEp503_SS_BYTESZ], + uint8_t out_ciphertext[SIKEp503_CT_BYTESZ], + const uint8_t pub_key[SIKEp503_PUB_BYTESZ]) { + // Secret buffer is reused by the function to store some ephemeral + // secret data. It's size must be maximum of SHA256_CBLOCK, + // SIKEp503_MSG_BYTESZ and SIDHp503_PRV_A_BITSZ in bytes. + uint8_t secret[SHA256_CBLOCK]; + uint8_t j[SIDHp503_JINV_BYTESZ]; + uint8_t temp[SIKEp503_MSG_BYTESZ + SIKEp503_CT_BYTESZ]; + SHA256_CTX ctx; + + // Generate secret key for A + // secret key A = HMAC({0,1}^n || pub_key), G) mod SIDHp503_PRV_A_BITSZ + RAND_bytes(temp, SIKEp503_MSG_BYTESZ); + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ); + SHA256_Update(&ctx, pub_key, SIKEp503_PUB_BYTESZ); + SHA256_Final(secret, &ctx); + hmac_sum(secret, BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ), G, secret); + secret[BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ) - 1] &= + (1 << (SIDHp503_PRV_A_BITSZ % 8)) - 1; + + // Generate public key for A - first part of the ciphertext + gen_iso_A(secret, out_ciphertext); + + // Generate c1: + // h = HMAC(j-invariant(secret key A, public key B), F) + // c1 = h ^ m + ex_iso_A(secret, pub_key, j); + SHA256_Init(&ctx); + SHA256_Update(&ctx, j, sizeof(j)); + SHA256_Final(secret, &ctx); + hmac_sum(secret, SIKEp503_MSG_BYTESZ, F, secret); + + // c1 = h ^ m + uint8_t *c1 = &out_ciphertext[SIKEp503_PUB_BYTESZ]; + for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) { + c1[i] = temp[i] ^ secret[i]; + } + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ); + SHA256_Update(&ctx, out_ciphertext, SIKEp503_CT_BYTESZ); + SHA256_Final(secret, &ctx); + // Generate shared secret out_shared_key = HMAC(m||out_ciphertext, F) + hmac_sum(out_shared_key, SIKEp503_SS_BYTESZ, H, secret); +} + +void SIKE_decaps(uint8_t out_shared_key[SIKEp503_SS_BYTESZ], + const uint8_t ciphertext[SIKEp503_CT_BYTESZ], + const uint8_t pub_key[SIKEp503_PUB_BYTESZ], + const uint8_t priv_key[SIKEp503_PRV_BYTESZ]) { + // Secret buffer is reused by the function to store some ephemeral + // secret data. It's size must be maximum of SHA256_CBLOCK, + // SIKEp503_MSG_BYTESZ and SIDHp503_PRV_A_BITSZ in bytes. + uint8_t secret[SHA256_CBLOCK]; + uint8_t j[SIDHp503_JINV_BYTESZ]; + uint8_t c0[SIKEp503_PUB_BYTESZ]; + uint8_t temp[SIKEp503_MSG_BYTESZ]; + uint8_t shared_nok[SIKEp503_MSG_BYTESZ]; + SHA256_CTX ctx; + + RAND_bytes(shared_nok, SIKEp503_MSG_BYTESZ); + + // Recover m + // Let ciphertext = c0 || c1 - both have fixed sizes + // m = F(j-invariant(c0, priv_key)) ^ c1 + ex_iso_B(priv_key, ciphertext, j); + + SHA256_Init(&ctx); + SHA256_Update(&ctx, j, sizeof(j)); + SHA256_Final(secret, &ctx); + hmac_sum(secret, SIKEp503_MSG_BYTESZ, F, secret); + + const uint8_t *c1 = &ciphertext[sizeof(c0)]; + for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) { + temp[i] = c1[i] ^ secret[i]; + } + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ); + SHA256_Update(&ctx, pub_key, SIKEp503_PUB_BYTESZ); + SHA256_Final(secret, &ctx); + hmac_sum(secret, BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ), G, secret); + + // Recover secret key A = G(m||pub_key) mod + secret[BITS_TO_BYTES(SIDHp503_PRV_A_BITSZ) - 1] &= + (1 << (SIDHp503_PRV_A_BITSZ % 8)) - 1; + + // Recover c0 = public key A + gen_iso_A(secret, c0); + crypto_word_t ok = constant_time_is_zero_w( + CRYPTO_memcmp(c0, ciphertext, SIKEp503_PUB_BYTESZ)); + for (size_t i = 0; i < SIKEp503_MSG_BYTESZ; i++) { + temp[i] = constant_time_select_8(ok, temp[i], shared_nok[i]); + } + + SHA256_Init(&ctx); + SHA256_Update(&ctx, temp, SIKEp503_MSG_BYTESZ); + SHA256_Update(&ctx, ciphertext, SIKEp503_CT_BYTESZ); + SHA256_Final(secret, &ctx); + hmac_sum(out_shared_key, SIKEp503_SS_BYTESZ, H, secret); +} diff --git a/Sources/CNIOBoringSSL/third_party/sike/sike.h b/Sources/CNIOBoringSSL/third_party/sike/sike.h new file mode 100644 index 00000000..a037aba6 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/sike.h @@ -0,0 +1,64 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: API header file for SIKE +*********************************************************************************************/ + +#ifndef SIKE_H_ +#define SIKE_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* SIKEp503 + * + * SIKE is a isogeny based post-quantum key encapsulation mechanism. Description of the + * algorithm is provided in [SIKE]. This implementation uses 503-bit field size. The code + * is based on "Additional_Implementations" from PQC NIST submission package which can + * be found here: + * https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/SIKE.zip + * + * [SIKE] https://sike.org/files/SIDH-spec.pdf + */ + +// SIKEp503_PUB_BYTESZ is the number of bytes in a public key. +#define SIKEp503_PUB_BYTESZ 378 +// SIKEp503_PRV_BYTESZ is the number of bytes in a private key. +#define SIKEp503_PRV_BYTESZ 32 +// SIKEp503_SS_BYTESZ is the number of bytes in a shared key. +#define SIKEp503_SS_BYTESZ 16 +// SIKEp503_MSG_BYTESZ is the number of bytes in a random bit string concatenated +// with the public key (see 1.4 of SIKE). +#define SIKEp503_MSG_BYTESZ 24 +// SIKEp503_SS_BYTESZ is the number of bytes in a ciphertext. +#define SIKEp503_CT_BYTESZ (SIKEp503_PUB_BYTESZ + SIKEp503_MSG_BYTESZ) + +// SIKE_keypair outputs a public and secret key. Internally it uses BN_rand() as +// an entropy source. In case of success function returns 1, otherwise 0. +OPENSSL_EXPORT int SIKE_keypair( + uint8_t out_priv[SIKEp503_PRV_BYTESZ], + uint8_t out_pub[SIKEp503_PUB_BYTESZ]); + +// SIKE_encaps generates and encrypts a random session key, writing those values to +// |out_shared_key| and |out_ciphertext|, respectively. +OPENSSL_EXPORT void SIKE_encaps( + uint8_t out_shared_key[SIKEp503_SS_BYTESZ], + uint8_t out_ciphertext[SIKEp503_CT_BYTESZ], + const uint8_t pub_key[SIKEp503_PUB_BYTESZ]); + +// SIKE_decaps outputs a random session key, writing it to |out_shared_key|. +OPENSSL_EXPORT void SIKE_decaps( + uint8_t out_shared_key[SIKEp503_SS_BYTESZ], + const uint8_t ciphertext[SIKEp503_CT_BYTESZ], + const uint8_t pub_key[SIKEp503_PUB_BYTESZ], + const uint8_t priv_key[SIKEp503_PRV_BYTESZ]); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Sources/CNIOBoringSSL/third_party/sike/utils.h b/Sources/CNIOBoringSSL/third_party/sike/utils.h new file mode 100644 index 00000000..236fe3c4 --- /dev/null +++ b/Sources/CNIOBoringSSL/third_party/sike/utils.h @@ -0,0 +1,143 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: internal header file for P503 +*********************************************************************************************/ + +#ifndef UTILS_H_ +#define UTILS_H_ + +#include + +#include "../crypto/internal.h" +#include "sike.h" + +// Conversion macro from number of bits to number of bytes +#define BITS_TO_BYTES(nbits) (((nbits)+7)/8) + +// Bit size of the field +#define BITS_FIELD 503 +// Byte size of the field +#define FIELD_BYTESZ BITS_TO_BYTES(BITS_FIELD) +// Number of 64-bit words of a 256-bit element +#define NBITS_ORDER 256 +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) +// Number of elements in Alice's strategy +#define A_max 125 +// Number of elements in Bob's strategy +#define B_max 159 +// Word size size +#define RADIX sizeof(crypto_word_t)*8 +// Byte size of a limb +#define LSZ sizeof(crypto_word_t) + +#if defined(OPENSSL_64_BIT) + // Number of words of a 503-bit field element + #define NWORDS_FIELD 8 + // Number of "0" digits in the least significant part of p503 + 1 + #define p503_ZERO_WORDS 3 + // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. + #define U64_TO_WORDS(x) UINT64_C(x) +#else + // Number of words of a 503-bit field element + #define NWORDS_FIELD 16 + // Number of "0" digits in the least significant part of p503 + 1 + #define p503_ZERO_WORDS 7 + // U64_TO_WORDS expands |x| for a |crypto_word_t| array literal. + #define U64_TO_WORDS(x) \ + (uint32_t)(UINT64_C(x) & 0xffffffff), (uint32_t)(UINT64_C(x) >> 32) +#endif + +// Extended datatype support +#if !defined(BORINGSSL_HAS_UINT128) + typedef uint64_t uint128_t[2]; +#endif + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) digit_x_digit((multiplier), (multiplicand), &(lo)); + +// If mask |x|==0xff.ff set |x| to 1, otherwise 0 +#define M2B(x) ((x)>>(RADIX-1)) + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ +do { \ + crypto_word_t tempReg = (addend1) + (crypto_word_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = M2B(constant_time_lt_w(tempReg, (crypto_word_t)(carryIn)) | \ + constant_time_lt_w((sumOut), tempReg)); \ +} while(0) + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ +do { \ + crypto_word_t tempReg = (minuend) - (subtrahend); \ + crypto_word_t borrowReg = M2B(constant_time_lt_w((minuend), (subtrahend))); \ + borrowReg |= ((borrowIn) & constant_time_is_zero_w(tempReg)); \ + (differenceOut) = tempReg - (crypto_word_t)(borrowIn); \ + (borrowOut) = borrowReg; \ +} while(0) + +/* Old GCC 4.9 (jessie) doesn't implement {0} initialization properly, + which violates C11 as described in 6.7.9, 21 (similarily C99, 6.7.8). + Defines below are used to work around the bug, and provide a way + to initialize f2elem_t and point_proj_t structs. + Bug has been fixed in GCC6 (debian stretch). +*/ +#define F2ELM_INIT {{ {0}, {0} }} +#define POINT_PROJ_INIT {{ F2ELM_INIT, F2ELM_INIT }} + +// Datatype for representing 503-bit field elements (512-bit max.) +// Elements over GF(p503) are encoded in 63 octets in little endian format +// (i.e., the least significant octet is located in the lowest memory address). +typedef crypto_word_t felm_t[NWORDS_FIELD]; + +// An element in F_{p^2}, is composed of two coefficients from F_p, * i.e. +// Fp2 element = c0 + c1*i in F_{p^2} +// Datatype for representing double-precision 2x503-bit field elements (512-bit max.) +// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are +// encoded as {a, b}, with a in the lowest memory portion. +typedef struct { + felm_t c0; + felm_t c1; +} fp2; + +// Our F_{p^2} element type is a pointer to the struct. +typedef fp2 f2elm_t[1]; + +// Datatype for representing double-precision 2x503-bit +// field elements in contiguous memory. +typedef crypto_word_t dfelm_t[2*NWORDS_FIELD]; + +// Constants used during SIKEp503 computation. +struct params_t { + // Stores P503 prime + const crypto_word_t prime[NWORDS_FIELD]; + // Stores P503 + 1 + const crypto_word_t prime_p1[NWORDS_FIELD]; + // Stores P503 * 2 + const crypto_word_t prime_x2[NWORDS_FIELD]; + // Alice's generator values {XPA0 + XPA1*i, XQA0, XRA0 + XRA1*i} + // in GF(p503^2), expressed in Montgomery representation + const crypto_word_t A_gen[5*NWORDS_FIELD]; + // Bob's generator values {XPB0 + XPB1*i, XQB0, XRB0 + XRB1*i} + // in GF(p503^2), expressed in Montgomery representation + const crypto_word_t B_gen[5*NWORDS_FIELD]; + // Montgomery constant mont_R2 = (2^512)^2 mod p503 + const crypto_word_t mont_R2[NWORDS_FIELD]; + // Value 'one' in Montgomery representation + const crypto_word_t mont_one[NWORDS_FIELD]; + // Fixed parameters for isogeny tree computation + const unsigned int A_strat[A_max-1]; + const unsigned int B_strat[B_max-1]; +}; + +// Point representation in projective XZ Montgomery coordinates. +typedef struct { + f2elm_t X; + f2elm_t Z; +} point_proj; +typedef point_proj point_proj_t[1]; + +#endif // UTILS_H_ diff --git a/scripts/build-asm.py b/scripts/build-asm.py index 543a36e1..3f358558 100755 --- a/scripts/build-asm.py +++ b/scripts/build-asm.py @@ -36,8 +36,11 @@ # perlasm system. NON_PERL_FILES = { ('linux', 'arm'): [ - 'crypto/curve25519/asm/x25519-asm-arm.S', - 'crypto/poly1305/poly1305_arm_asm.S', + 'boringssl/crypto/curve25519/asm/x25519-asm-arm.S', + 'boringssl/crypto/poly1305/poly1305_arm_asm.S', + ], + ('linux', 'x86_64'): [ + 'boringssl/crypto/hrss/asm/poly_rq_mul.S', ], } @@ -82,7 +85,7 @@ def ReadPerlAsmOperations(): """Returns a list of all perlasm() directives found in CMake config files in src/.""" perlasms = [] - cmakefiles = FindCMakeFiles('crypto') + cmakefiles = FindCMakeFiles('boringssl') for cmakefile in cmakefiles: perlasms.extend(ExtractPerlAsmFromCMakeFile(cmakefile)) @@ -133,9 +136,9 @@ def WriteAsmFiles(perlasms): for perlasm in perlasms: filename = os.path.basename(perlasm['input']) output = perlasm['output'] - if not output.startswith('crypto'): + if not output.startswith('boringssl/crypto'): raise ValueError('output missing crypto: %s' % output) - output = os.path.join(outDir, output[7:]) + output = os.path.join(outDir, output[17:]) if output.endswith('-armx.${ASM_EXT}'): output = output.replace('-armx', '-armx64' if arch == 'aarch64' else '-armx32') @@ -169,7 +172,7 @@ def preprocessor_platform_for_os(osname): def asm_target(osname, arch, asm): components = asm.split('/') - new_components = ["crypto"] + components[1:-1] + [components[-1].replace('.S', '.' + osname + '.' + arch + '.S')] + new_components = ["boringssl/crypto"] + components[1:-1] + [components[-1].replace('.S', '.' + osname + '.' + arch + '.S')] return '/'.join(new_components) diff --git a/scripts/vendor-boringssl.sh b/scripts/vendor-boringssl.sh index bb6b5264..a0c8898b 100755 --- a/scripts/vendor-boringssl.sh +++ b/scripts/vendor-boringssl.sh @@ -108,6 +108,8 @@ echo "OBTAINING submodules" echo "GENERATING assembly helpers" ( cd "$SRCROOT" + cd .. + mkdir -p "${SRCROOT}/crypto/third_party/sike/asm" python "${HERE}/scripts/build-asm.py" ) @@ -124,8 +126,12 @@ PATTERNS=( 'crypto/*/*/*.c' 'crypto/*/*/*.S' 'crypto/*/*/*/*.c' +'crypto/*/*/*/*.S' 'third_party/fiat/*.h' 'third_party/fiat/*.c' +'third_party/sike/*.h' +'third_party/sike/*.c' +'third_party/sike/asm/*.c' ) EXCLUDES=(