diff options
Diffstat (limited to 'Backport-SM4-optimization-for-ARM-by-HW-instruction.patch')
-rw-r--r-- | Backport-SM4-optimization-for-ARM-by-HW-instruction.patch | 1228 |
1 files changed, 1228 insertions, 0 deletions
diff --git a/Backport-SM4-optimization-for-ARM-by-HW-instruction.patch b/Backport-SM4-optimization-for-ARM-by-HW-instruction.patch new file mode 100644 index 0000000..c68f1a0 --- /dev/null +++ b/Backport-SM4-optimization-for-ARM-by-HW-instruction.patch @@ -0,0 +1,1228 @@ +From 1cd480c10b8bbaa6f72d503494ff2973672ec0e4 Mon Sep 17 00:00:00 2001 +From: Daniel Hu <Daniel.Hu@arm.com> +Date: Tue, 19 Oct 2021 22:49:05 +0100 +Subject: [PATCH 05/13] SM4 optimization for ARM by HW instruction + +This patch implements the SM4 optimization for ARM processor, +using SM4 HW instruction, which is an optional feature of +crypto extension for aarch64 V8. + +Tested on some modern ARM micro-architectures with SM4 support, the +performance uplift can be observed around 8X~40X over existing +C implementation in openssl. Algorithms that can be parallelized +(like CTR, ECB, CBC decryption) are on higher end, with algorithm +like CBC encryption on lower end (due to inter-block dependency) + +Perf data on Yitian-710 2.75GHz hardware, before and after optimization: + +Before: + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 105787.80k 107837.87k 108380.84k 108462.08k 108549.46k 108554.92k + SM4-ECB 111924.58k 118173.76k 119776.00k 120093.70k 120264.02k 120274.94k + SM4-CBC 106428.09k 109190.98k 109674.33k 109774.51k 109827.41k 109827.41k + +After (7.4x - 36.6x faster): + type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes + SM4-CTR 781979.02k 2432994.28k 3437753.86k 3834177.88k 3963715.58k 3974556.33k + SM4-ECB 937590.69k 2941689.02k 3945751.81k 4328655.87k 4459181.40k 4468692.31k + SM4-CBC 890639.88k 1027746.58k 1050621.78k 1056696.66k 1058613.93k 1058701.31k + +Signed-off-by: Daniel Hu <Daniel.Hu@arm.com> + +Reviewed-by: Paul Dale <pauli@openssl.org> +Reviewed-by: Tomas Mraz <tomas@openssl.org> +(Merged from https://github.com/openssl/openssl/pull/17455) +--- + crypto/arm64cpuid.pl | 8 + + crypto/arm_arch.h | 1 + + crypto/armcap.c | 10 + + crypto/evp/e_sm4.c | 193 ++++-- + crypto/sm4/asm/sm4-armv8.pl | 635 ++++++++++++++++++ + crypto/sm4/build.info | 32 +- + include/crypto/sm4_platform.h | 48 ++ + .../implementations/ciphers/cipher_sm4.h | 1 + + .../ciphers/cipher_sm4_gcm_hw.c | 20 +- + .../implementations/ciphers/cipher_sm4_hw.c | 57 +- + 10 files changed, 945 insertions(+), 60 deletions(-) + create mode 100755 crypto/sm4/asm/sm4-armv8.pl + create mode 100644 include/crypto/sm4_platform.h + +diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl +index 10d267b7ad..36af3e075b 100755 +--- a/crypto/arm64cpuid.pl ++++ b/crypto/arm64cpuid.pl +@@ -80,6 +80,14 @@ _armv8_pmull_probe: + ret + .size _armv8_pmull_probe,.-_armv8_pmull_probe + ++.globl _armv8_sm4_probe ++.type _armv8_sm4_probe,%function ++_armv8_sm4_probe: ++ AARCH64_VALID_CALL_TARGET ++ .long 0xcec08400 // sm4e v0.4s, v0.4s ++ ret ++.size _armv8_sm4_probe,.-_armv8_sm4_probe ++ + .globl _armv8_sha512_probe + .type _armv8_sha512_probe,%function + _armv8_sha512_probe: +diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h +index c8b501f34c..5b5af31d92 100644 +--- a/crypto/arm_arch.h ++++ b/crypto/arm_arch.h +@@ -85,6 +85,7 @@ extern unsigned int OPENSSL_armv8_rsa_neonized; + # define ARMV8_CPUID (1<<7) + # define ARMV8_RNG (1<<8) + # define ARMV8_SM3 (1<<9) ++# define ARMV8_SM4 (1<<10) + + /* + * MIDR_EL1 system register +diff --git a/crypto/armcap.c b/crypto/armcap.c +index 365a48df45..c5aa062767 100644 +--- a/crypto/armcap.c ++++ b/crypto/armcap.c +@@ -53,6 +53,7 @@ void _armv8_sha256_probe(void); + void _armv8_pmull_probe(void); + # ifdef __aarch64__ + void _armv8_sm3_probe(void); ++void _armv8_sm4_probe(void); + void _armv8_sha512_probe(void); + unsigned int _armv8_cpuid_probe(void); + # endif +@@ -139,6 +140,7 @@ static unsigned long getauxval(unsigned long key) + # define HWCAP_CE_SHA256 (1 << 6) + # define HWCAP_CPUID (1 << 11) + # define HWCAP_CE_SM3 (1 << 18) ++# define HWCAP_CE_SM4 (1 << 19) + # define HWCAP_CE_SHA512 (1 << 21) + # endif + +@@ -207,6 +209,9 @@ void OPENSSL_cpuid_setup(void) + OPENSSL_armcap_P |= ARMV8_SHA256; + + # ifdef __aarch64__ ++ if (hwcap & HWCAP_CE_SM4) ++ OPENSSL_armcap_P |= ARMV8_SM4; ++ + if (hwcap & HWCAP_CE_SHA512) + OPENSSL_armcap_P |= ARMV8_SHA512; + +@@ -254,6 +259,11 @@ void OPENSSL_cpuid_setup(void) + OPENSSL_armcap_P |= ARMV8_SHA256; + } + # if defined(__aarch64__) && !defined(__APPLE__) ++ if (sigsetjmp(ill_jmp, 1) == 0) { ++ _armv8_sm4_probe(); ++ OPENSSL_armcap_P |= ARMV8_SM4; ++ } ++ + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sha512_probe(); + OPENSSL_armcap_P |= ARMV8_SHA512; +diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c +index abd603015c..bff79ff197 100644 +--- a/crypto/evp/e_sm4.c ++++ b/crypto/evp/e_sm4.c +@@ -17,92 +17,187 @@ + # include <openssl/modes.h> + # include "crypto/sm4.h" + # include "crypto/evp.h" ++# include "crypto/sm4_platform.h" + # include "evp_local.h" + + typedef struct { +- SM4_KEY ks; ++ union { ++ OSSL_UNION_ALIGN; ++ SM4_KEY ks; ++ } ks; ++ block128_f block; ++ union { ++ ecb128_f ecb; ++ cbc128_f cbc; ++ ctr128_f ctr; ++ } stream; + } EVP_SM4_KEY; + ++# define BLOCK_CIPHER_generic(nid,blocksize,ivlen,nmode,mode,MODE,flags) \ ++static const EVP_CIPHER sm4_##mode = { \ ++ nid##_##nmode,blocksize,128/8,ivlen, \ ++ flags|EVP_CIPH_##MODE##_MODE, \ ++ EVP_ORIG_GLOBAL, \ ++ sm4_init_key, \ ++ sm4_##mode##_cipher, \ ++ NULL, \ ++ sizeof(EVP_SM4_KEY), \ ++ NULL,NULL,NULL,NULL }; \ ++const EVP_CIPHER *EVP_sm4_##mode(void) \ ++{ return &sm4_##mode; } ++ ++#define DEFINE_BLOCK_CIPHERS(nid,flags) \ ++ BLOCK_CIPHER_generic(nid,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,ctr,ctr,CTR,flags) ++ + static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { +- ossl_sm4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ int mode; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ mode = EVP_CIPHER_CTX_get_mode(ctx); ++ if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) ++ && !enc) { ++#ifdef HWSM4_CAPABLE ++ if (HWSM4_CAPABLE) { ++ HWSM4_set_decrypt_key(key, &dat->ks.ks); ++ dat->block = (block128_f) HWSM4_decrypt; ++ dat->stream.cbc = NULL; ++# ifdef HWSM4_cbc_encrypt ++ if (mode == EVP_CIPH_CBC_MODE) ++ dat->stream.cbc = (cbc128_f) HWSM4_cbc_encrypt; ++# endif ++# ifdef HWSM4_ecb_encrypt ++ if (mode == EVP_CIPH_ECB_MODE) ++ dat->stream.ecb = (ecb128_f) HWSM4_ecb_encrypt; ++# endif ++ } else ++#endif ++ { ++ dat->block = (block128_f) ossl_sm4_decrypt; ++ ossl_sm4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } ++ } else ++#ifdef HWSM4_CAPABLE ++ if (HWSM4_CAPABLE) { ++ HWSM4_set_encrypt_key(key, &dat->ks.ks); ++ dat->block = (block128_f) HWSM4_encrypt; ++ dat->stream.cbc = NULL; ++# ifdef HWSM4_cbc_encrypt ++ if (mode == EVP_CIPH_CBC_MODE) ++ dat->stream.cbc = (cbc128_f) HWSM4_cbc_encrypt; ++ else ++# endif ++# ifdef HWSM4_ecb_encrypt ++ if (mode == EVP_CIPH_ECB_MODE) ++ dat->stream.ecb = (ecb128_f) HWSM4_ecb_encrypt; ++ else ++# endif ++# ifdef HWSM4_ctr32_encrypt_blocks ++ if (mode == EVP_CIPH_CTR_MODE) ++ dat->stream.ctr = (ctr128_f) HWSM4_ctr32_encrypt_blocks; ++ else ++# endif ++ (void)0; /* terminate potentially open 'else' */ ++ } else ++#endif ++ { ++ dat->block = (block128_f) ossl_sm4_encrypt; ++ ossl_sm4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } + return 1; + } + +-static void sm4_cbc_encrypt(const unsigned char *in, unsigned char *out, +- size_t len, const SM4_KEY *key, +- unsigned char *ivec, const int enc) ++static int sm4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- if (enc) +- CRYPTO_cbc128_encrypt(in, out, len, key, ivec, +- (block128_f)ossl_sm4_encrypt); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (dat->stream.cbc) ++ (*dat->stream.cbc) (in, out, len, &dat->ks.ks, ctx->iv, ++ EVP_CIPHER_CTX_is_encrypting(ctx)); ++ else if (EVP_CIPHER_CTX_is_encrypting(ctx)) ++ CRYPTO_cbc128_encrypt(in, out, len, &dat->ks, ctx->iv, ++ dat->block); + else +- CRYPTO_cbc128_decrypt(in, out, len, key, ivec, +- (block128_f)ossl_sm4_decrypt); ++ CRYPTO_cbc128_decrypt(in, out, len, &dat->ks, ++ ctx->iv, dat->block); ++ return 1; + } + +-static void sm4_cfb128_encrypt(const unsigned char *in, unsigned char *out, +- size_t length, const SM4_KEY *key, +- unsigned char *ivec, int *num, const int enc) ++static int sm4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc, +- (block128_f)ossl_sm4_encrypt); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ int num = EVP_CIPHER_CTX_get_num(ctx); ++ ++ CRYPTO_cfb128_encrypt(in, out, len, &dat->ks, ++ ctx->iv, &num, ++ EVP_CIPHER_CTX_is_encrypting(ctx), dat->block); ++ EVP_CIPHER_CTX_set_num(ctx, num); ++ return 1; + } + +-static void sm4_ecb_encrypt(const unsigned char *in, unsigned char *out, +- const SM4_KEY *key, const int enc) ++static int sm4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- if (enc) +- ossl_sm4_encrypt(in, out, key); ++ size_t bl = EVP_CIPHER_CTX_get_block_size(ctx); ++ size_t i; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (len < bl) ++ return 1; ++ ++ if (dat->stream.ecb != NULL) ++ (*dat->stream.ecb) (in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_is_encrypting(ctx)); + else +- ossl_sm4_decrypt(in, out, key); ++ for (i = 0, len -= bl; i <= len; i += bl) ++ (*dat->block) (in + i, out + i, &dat->ks); ++ ++ return 1; + } + +-static void sm4_ofb128_encrypt(const unsigned char *in, unsigned char *out, +- size_t length, const SM4_KEY *key, +- unsigned char *ivec, int *num) ++static int sm4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num, +- (block128_f)ossl_sm4_encrypt); +-} ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ int num = EVP_CIPHER_CTX_get_num(ctx); + +-IMPLEMENT_BLOCK_CIPHER(sm4, ks, sm4, EVP_SM4_KEY, NID_sm4, +- 16, 16, 16, 128, EVP_CIPH_FLAG_DEFAULT_ASN1, +- sm4_init_key, 0, 0, 0, 0) ++ CRYPTO_ofb128_encrypt(in, out, len, &dat->ks, ++ ctx->iv, &num, dat->block); ++ EVP_CIPHER_CTX_set_num(ctx, num); ++ return 1; ++} + + static int sm4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { + int n = EVP_CIPHER_CTX_get_num(ctx); + unsigned int num; +- EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY, ctx); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); + + if (n < 0) + return 0; + num = (unsigned int)n; + +- CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, ctx->iv, +- EVP_CIPHER_CTX_buf_noconst(ctx), &num, +- (block128_f)ossl_sm4_encrypt); ++ if (dat->stream.ctr) ++ CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks, ++ ctx->iv, ++ EVP_CIPHER_CTX_buf_noconst(ctx), ++ &num, dat->stream.ctr); ++ else ++ CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, ++ ctx->iv, ++ EVP_CIPHER_CTX_buf_noconst(ctx), &num, ++ dat->block); + EVP_CIPHER_CTX_set_num(ctx, num); + return 1; + } + +-static const EVP_CIPHER sm4_ctr_mode = { +- NID_sm4_ctr, 1, 16, 16, +- EVP_CIPH_CTR_MODE, +- EVP_ORIG_GLOBAL, +- sm4_init_key, +- sm4_ctr_cipher, +- NULL, +- sizeof(EVP_SM4_KEY), +- NULL, NULL, NULL, NULL +-}; +- +-const EVP_CIPHER *EVP_sm4_ctr(void) +-{ +- return &sm4_ctr_mode; +-} +- ++DEFINE_BLOCK_CIPHERS(NID_sm4, 0) + #endif +diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl +new file mode 100755 +index 0000000000..7358a6e6a2 +--- /dev/null ++++ b/crypto/sm4/asm/sm4-armv8.pl +@@ -0,0 +1,635 @@ ++#! /usr/bin/env perl ++# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# This module implements support for SM4 hw support on aarch64 ++# Oct 2021 ++# ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or ++die "can't locate arm-xlate.pl"; ++ ++open OUT,"| \"$^X\" $xlate $flavour \"$output\"" ++ or die "can't call $xlate: $!"; ++*STDOUT=*OUT; ++ ++$prefix="sm4_v8"; ++my @rks=map("v$_",(0..7)); ++ ++sub rev32() { ++my $dst = shift; ++my $src = shift; ++$code.=<<___; ++#ifndef __ARMEB__ ++ rev32 $dst.16b,$src.16b ++#endif ++___ ++} ++ ++sub enc_blk () { ++my $data = shift; ++$code.=<<___; ++ sm4e $data.4s,@rks[0].4s ++ sm4e $data.4s,@rks[1].4s ++ sm4e $data.4s,@rks[2].4s ++ sm4e $data.4s,@rks[3].4s ++ sm4e $data.4s,@rks[4].4s ++ sm4e $data.4s,@rks[5].4s ++ sm4e $data.4s,@rks[6].4s ++ sm4e $data.4s,@rks[7].4s ++ rev64 $data.4S,$data.4S ++ ext $data.16b,$data.16b,$data.16b,#8 ++___ ++} ++ ++sub enc_4blks () { ++my $data0 = shift; ++my $data1 = shift; ++my $data2 = shift; ++my $data3 = shift; ++$code.=<<___; ++ sm4e $data0.4s,@rks[0].4s ++ sm4e $data1.4s,@rks[0].4s ++ sm4e $data2.4s,@rks[0].4s ++ sm4e $data3.4s,@rks[0].4s ++ ++ sm4e $data0.4s,@rks[1].4s ++ sm4e $data1.4s,@rks[1].4s ++ sm4e $data2.4s,@rks[1].4s ++ sm4e $data3.4s,@rks[1].4s ++ ++ sm4e $data0.4s,@rks[2].4s ++ sm4e $data1.4s,@rks[2].4s ++ sm4e $data2.4s,@rks[2].4s ++ sm4e $data3.4s,@rks[2].4s ++ ++ sm4e $data0.4s,@rks[3].4s ++ sm4e $data1.4s,@rks[3].4s ++ sm4e $data2.4s,@rks[3].4s ++ sm4e $data3.4s,@rks[3].4s ++ ++ sm4e $data0.4s,@rks[4].4s ++ sm4e $data1.4s,@rks[4].4s ++ sm4e $data2.4s,@rks[4].4s ++ sm4e $data3.4s,@rks[4].4s ++ ++ sm4e $data0.4s,@rks[5].4s ++ sm4e $data1.4s,@rks[5].4s ++ sm4e $data2.4s,@rks[5].4s ++ sm4e $data3.4s,@rks[5].4s ++ ++ sm4e $data0.4s,@rks[6].4s ++ sm4e $data1.4s,@rks[6].4s ++ sm4e $data2.4s,@rks[6].4s ++ sm4e $data3.4s,@rks[6].4s ++ ++ sm4e $data0.4s,@rks[7].4s ++ rev64 $data0.4S,$data0.4S ++ sm4e $data1.4s,@rks[7].4s ++ ext $data0.16b,$data0.16b,$data0.16b,#8 ++ rev64 $data1.4S,$data1.4S ++ sm4e $data2.4s,@rks[7].4s ++ ext $data1.16b,$data1.16b,$data1.16b,#8 ++ rev64 $data2.4S,$data2.4S ++ sm4e $data3.4s,@rks[7].4s ++ ext $data2.16b,$data2.16b,$data2.16b,#8 ++ rev64 $data3.4S,$data3.4S ++ ext $data3.16b,$data3.16b,$data3.16b,#8 ++___ ++} ++ ++$code=<<___; ++#include "arm_arch.h" ++.arch armv8-a+crypto ++.text ++___ ++ ++{{{ ++$code.=<<___; ++.align 6 ++.Lck: ++ .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 ++ .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 ++ .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 ++ .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 ++ .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 ++ .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 ++ .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 ++ .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 ++.Lfk: ++ .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc ++___ ++}}} ++ ++{{{ ++my ($key,$keys)=("x0","x1"); ++my ($tmp)=("x2"); ++my ($key0,$key1,$key2,$key3,$key4,$key5,$key6,$key7)=map("v$_",(0..7)); ++my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23)); ++my ($fkconst) = ("v24"); ++$code.=<<___; ++.globl ${prefix}_set_encrypt_key ++.type ${prefix}_set_encrypt_key,%function ++.align 5 ++${prefix}_set_encrypt_key: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {$key0.4s},[$key] ++ adr $tmp,.Lfk ++ ld1 {$fkconst.4s},[$tmp] ++ adr $tmp,.Lck ++ ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64 ++___ ++ &rev32($key0, $key0); ++$code.=<<___; ++ ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp] ++ eor $key0.16b,$key0.16b,$fkconst.16b; ++ sm4ekey $key0.4S,$key0.4S,$const0.4S ++ sm4ekey $key1.4S,$key0.4S,$const1.4S ++ sm4ekey $key2.4S,$key1.4S,$const2.4S ++ sm4ekey $key3.4S,$key2.4S,$const3.4S ++ sm4ekey $key4.4S,$key3.4S,$const4.4S ++ st1 {$key0.4s,$key1.4s,$key2.4s,$key3.4s},[$keys],64 ++ sm4ekey $key5.4S,$key4.4S,$const5.4S ++ sm4ekey $key6.4S,$key5.4S,$const6.4S ++ sm4ekey $key7.4S,$key6.4S,$const7.4S ++ st1 {$key4.4s,$key5.4s,$key6.4s,$key7.4s},[$keys] ++ ret ++.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key ++___ ++}}} ++ ++{{{ ++my ($key,$keys)=("x0","x1"); ++my ($tmp)=("x2"); ++my ($key7,$key6,$key5,$key4,$key3,$key2,$key1,$key0)=map("v$_",(0..7)); ++my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23)); ++my ($fkconst) = ("v24"); ++$code.=<<___; ++.globl ${prefix}_set_decrypt_key ++.type ${prefix}_set_decrypt_key,%function ++.align 5 ++${prefix}_set_decrypt_key: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {$key0.4s},[$key] ++ adr $tmp,.Lfk ++ ld1 {$fkconst.4s},[$tmp] ++ adr $tmp, .Lck ++ ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64 ++___ ++ &rev32($key0, $key0); ++$code.=<<___; ++ ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp] ++ eor $key0.16b, $key0.16b,$fkconst.16b; ++ sm4ekey $key0.4S,$key0.4S,$const0.4S ++ sm4ekey $key1.4S,$key0.4S,$const1.4S ++ sm4ekey $key2.4S,$key1.4S,$const2.4S ++ rev64 $key0.4s,$key0.4s ++ rev64 $key1.4s,$key1.4s ++ ext $key0.16b,$key0.16b,$key0.16b,#8 ++ ext $key1.16b,$key1.16b,$key1.16b,#8 ++ sm4ekey $key3.4S,$key2.4S,$const3.4S ++ sm4ekey $key4.4S,$key3.4S,$const4.4S ++ rev64 $key2.4s,$key2.4s ++ rev64 $key3.4s,$key3.4s ++ ext $key2.16b,$key2.16b,$key2.16b,#8 ++ ext $key3.16b,$key3.16b,$key3.16b,#8 ++ sm4ekey $key5.4S,$key4.4S,$const5.4S ++ sm4ekey $key6.4S,$key5.4S,$const6.4S ++ rev64 $key4.4s,$key4.4s ++ rev64 $key5.4s,$key5.4s ++ ext $key4.16b,$key4.16b,$key4.16b,#8 ++ ext $key5.16b,$key5.16b,$key5.16b,#8 ++ sm4ekey $key7.4S,$key6.4S,$const7.4S ++ rev64 $key6.4s, $key6.4s ++ rev64 $key7.4s, $key7.4s ++ ext $key6.16b,$key6.16b,$key6.16b,#8 ++ ext $key7.16b,$key7.16b,$key7.16b,#8 ++ st1 {$key7.4s,$key6.4s,$key5.4s,$key4.4s},[$keys],64 ++ st1 {$key3.4s,$key2.4s,$key1.4s,$key0.4s},[$keys] ++ ret ++.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key ++___ ++}}} ++ ++{{{ ++sub gen_block () { ++my $dir = shift; ++my ($inp,$out,$rk)=map("x$_",(0..2)); ++my ($data)=("v16"); ++$code.=<<___; ++.globl ${prefix}_${dir}crypt ++.type ${prefix}_${dir}crypt,%function ++.align 5 ++${prefix}_${dir}crypt: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {$data.4s},[$inp] ++ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64 ++ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] ++___ ++ &rev32($data,$data); ++ &enc_blk($data); ++ &rev32($data,$data); ++$code.=<<___; ++ st1 {$data.4s},[$out] ++ ret ++.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt ++___ ++} ++ ++&gen_block("en"); ++&gen_block("de"); ++}}} ++ ++{{{ ++my ($inp,$out,$len,$rk)=map("x$_",(0..3)); ++my ($enc) = ("w4"); ++my @dat=map("v$_",(16..23)); ++$code.=<<___; ++.globl ${prefix}_ecb_encrypt ++.type ${prefix}_ecb_encrypt,%function ++.align 5 ++${prefix}_ecb_encrypt: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64 ++ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] ++1: ++ cmp $len,#64 ++ b.lt 1f ++ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64 ++ cmp $len,#128 ++ b.lt 2f ++ ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp],#64 ++ // 8 blocks ++___ ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++ &rev32(@dat[4],@dat[4]); ++ &rev32(@dat[5],@dat[5]); ++ &rev32(@dat[6],@dat[6]); ++ &rev32(@dat[7],@dat[7]); ++ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); ++ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++ &rev32(@dat[4],@dat[4]); ++ &rev32(@dat[5],@dat[5]); ++$code.=<<___; ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++___ ++ &rev32(@dat[6],@dat[6]); ++ &rev32(@dat[7],@dat[7]); ++$code.=<<___; ++ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64 ++ subs $len,$len,#128 ++ b.gt 1b ++ ret ++ // 4 blocks ++2: ++___ ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++$code.=<<___; ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++ subs $len,$len,#64 ++ b.gt 1b ++1: ++ subs $len,$len,#16 ++ b.lt 1f ++ ld1 {@dat[0].4s},[$inp],#16 ++___ ++ &rev32(@dat[0],@dat[0]); ++ &enc_blk(@dat[0]); ++ &rev32(@dat[0],@dat[0]); ++$code.=<<___; ++ st1 {@dat[0].4s},[$out],#16 ++ b.ne 1b ++1: ++ ret ++.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt ++___ ++}}} ++ ++{{{ ++my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4)); ++my ($enc) = ("w5"); ++my @dat=map("v$_",(16..23)); ++my @in=map("v$_",(24..31)); ++my ($ivec) = ("v8"); ++$code.=<<___; ++.globl ${prefix}_cbc_encrypt ++.type ${prefix}_cbc_encrypt,%function ++.align 5 ++${prefix}_cbc_encrypt: ++ AARCH64_VALID_CALL_TARGET ++ stp d8,d9,[sp, #-16]! ++ ++ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64 ++ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] ++ ld1 {$ivec.4s},[$ivp] ++ cmp $enc,#0 ++ b.eq .Ldec ++1: ++ cmp $len, #64 ++ b.lt 1f ++ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64 ++ eor @dat[0].16b,@dat[0].16b,$ivec.16b ++___ ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++ &enc_blk(@dat[0]); ++$code.=<<___; ++ eor @dat[1].16b,@dat[1].16b,@dat[0].16b ++___ ++ &enc_blk(@dat[1]); ++ &rev32(@dat[0],@dat[0]); ++$code.=<<___; ++ eor @dat[2].16b,@dat[2].16b,@dat[1].16b ++___ ++ &enc_blk(@dat[2]); ++ &rev32(@dat[1],@dat[1]); ++$code.=<<___; ++ eor @dat[3].16b,@dat[3].16b,@dat[2].16b ++___ ++ &enc_blk(@dat[3]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++$code.=<<___; ++ mov $ivec.16b,@dat[3].16b ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++ subs $len,$len,#64 ++ b.ne 1b ++1: ++ subs $len,$len,#16 ++ b.lt 3f ++ ld1 {@dat[0].4s},[$inp],#16 ++ eor $ivec.16b,$ivec.16b,@dat[0].16b ++___ ++ &rev32($ivec,$ivec); ++ &enc_blk($ivec); ++ &rev32($ivec,$ivec); ++$code.=<<___; ++ st1 {$ivec.16b},[$out],#16 ++ b.ne 1b ++ b 3f ++.Ldec: ++1: ++ cmp $len, #64 ++ b.lt 1f ++ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp] ++ ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64 ++ cmp $len,#128 ++ b.lt 2f ++ // 8 blocks mode ++ ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp] ++ ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64 ++___ ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],$dat[3]); ++ &rev32(@dat[4],@dat[4]); ++ &rev32(@dat[5],@dat[5]); ++ &rev32(@dat[6],@dat[6]); ++ &rev32(@dat[7],$dat[7]); ++ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); ++ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++ &rev32(@dat[4],@dat[4]); ++ &rev32(@dat[5],@dat[5]); ++ &rev32(@dat[6],@dat[6]); ++ &rev32(@dat[7],@dat[7]); ++$code.=<<___; ++ eor @dat[0].16b,@dat[0].16b,$ivec.16b ++ eor @dat[1].16b,@dat[1].16b,@in[0].16b ++ eor @dat[2].16b,@dat[2].16b,@in[1].16b ++ mov $ivec.16b,@in[7].16b ++ eor @dat[3].16b,$dat[3].16b,@in[2].16b ++ eor @dat[4].16b,$dat[4].16b,@in[3].16b ++ eor @dat[5].16b,$dat[5].16b,@in[4].16b ++ eor @dat[6].16b,$dat[6].16b,@in[5].16b ++ eor @dat[7].16b,$dat[7].16b,@in[6].16b ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64 ++ subs $len,$len,128 ++ b.gt 1b ++ b 3f ++ // 4 blocks mode ++2: ++___ ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],$dat[3]); ++ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++$code.=<<___; ++ eor @dat[0].16b,@dat[0].16b,$ivec.16b ++ eor @dat[1].16b,@dat[1].16b,@in[0].16b ++ mov $ivec.16b,@in[3].16b ++ eor @dat[2].16b,@dat[2].16b,@in[1].16b ++ eor @dat[3].16b,$dat[3].16b,@in[2].16b ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++ subs $len,$len,#64 ++ b.gt 1b ++1: ++ subs $len,$len,#16 ++ b.lt 3f ++ ld1 {@dat[0].4s},[$inp],#16 ++ mov @in[0].16b,@dat[0].16b ++___ ++ &rev32(@dat[0],@dat[0]); ++ &enc_blk(@dat[0]); ++ &rev32(@dat[0],@dat[0]); ++$code.=<<___; ++ eor @dat[0].16b,@dat[0].16b,$ivec.16b ++ mov $ivec.16b,@in[0].16b ++ st1 {@dat[0].16b},[$out],#16 ++ b.ne 1b ++3: ++ // save back IV ++ st1 {$ivec.16b},[$ivp] ++ ldp d8,d9,[sp],#16 ++ ret ++.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt ++___ ++}}} ++ ++{{{ ++my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4)); ++my ($ctr)=("w5"); ++my @dat=map("v$_",(16..23)); ++my @in=map("v$_",(24..31)); ++my ($ivec)=("v8"); ++$code.=<<___; ++.globl ${prefix}_ctr32_encrypt_blocks ++.type ${prefix}_ctr32_encrypt_blocks,%function ++.align 5 ++${prefix}_ctr32_encrypt_blocks: ++ AARCH64_VALID_CALL_TARGET ++ stp d8,d9,[sp, #-16]! ++ ++ ld1 {$ivec.4s},[$ivp] ++ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64 ++ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] ++___ ++ &rev32($ivec,$ivec); ++$code.=<<___; ++ mov $ctr,$ivec.s[3] ++1: ++ cmp $len,#4 ++ b.lt 1f ++ ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64 ++ mov @dat[0].16b,$ivec.16b ++ mov @dat[1].16b,$ivec.16b ++ mov @dat[2].16b,$ivec.16b ++ mov @dat[3].16b,$ivec.16b ++ add $ctr,$ctr,#1 ++ mov $dat[1].s[3],$ctr ++ add $ctr,$ctr,#1 ++ mov @dat[2].s[3],$ctr ++ add $ctr,$ctr,#1 ++ mov @dat[3].s[3],$ctr ++ cmp $len,#8 ++ b.lt 2f ++ ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64 ++ mov @dat[4].16b,$ivec.16b ++ mov @dat[5].16b,$ivec.16b ++ mov @dat[6].16b,$ivec.16b ++ mov @dat[7].16b,$ivec.16b ++ add $ctr,$ctr,#1 ++ mov $dat[4].s[3],$ctr ++ add $ctr,$ctr,#1 ++ mov @dat[5].s[3],$ctr ++ add $ctr,$ctr,#1 ++ mov @dat[6].s[3],$ctr ++ add $ctr,$ctr,#1 ++ mov @dat[7].s[3],$ctr ++___ ++ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); ++ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++ &rev32(@dat[4],@dat[4]); ++ &rev32(@dat[5],@dat[5]); ++ &rev32(@dat[6],@dat[6]); ++ &rev32(@dat[7],@dat[7]); ++$code.=<<___; ++ eor @dat[0].16b,@dat[0].16b,@in[0].16b ++ eor @dat[1].16b,@dat[1].16b,@in[1].16b ++ eor @dat[2].16b,@dat[2].16b,@in[2].16b ++ eor @dat[3].16b,@dat[3].16b,@in[3].16b ++ eor @dat[4].16b,@dat[4].16b,@in[4].16b ++ eor @dat[5].16b,@dat[5].16b,@in[5].16b ++ eor @dat[6].16b,@dat[6].16b,@in[6].16b ++ eor @dat[7].16b,@dat[7].16b,@in[7].16b ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64 ++ subs $len,$len,#8 ++ b.eq 3f ++ add $ctr,$ctr,#1 ++ mov $ivec.s[3],$ctr ++ b 1b ++2: ++___ ++ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); ++ &rev32(@dat[0],@dat[0]); ++ &rev32(@dat[1],@dat[1]); ++ &rev32(@dat[2],@dat[2]); ++ &rev32(@dat[3],@dat[3]); ++$code.=<<___; ++ eor @dat[0].16b,@dat[0].16b,@in[0].16b ++ eor @dat[1].16b,@dat[1].16b,@in[1].16b ++ eor @dat[2].16b,@dat[2].16b,@in[2].16b ++ eor @dat[3].16b,@dat[3].16b,@in[3].16b ++ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 ++ subs $len,$len,#4 ++ b.eq 3f ++ add $ctr,$ctr,#1 ++ mov $ivec.s[3],$ctr ++ b 1b ++1: ++ subs $len,$len,#1 ++ b.lt 3f ++ mov $dat[0].16b,$ivec.16b ++ ld1 {@in[0].4s},[$inp],#16 ++___ ++ &enc_blk(@dat[0]); ++ &rev32(@dat[0],@dat[0]); ++$code.=<<___; ++ eor $dat[0].16b,$dat[0].16b,@in[0].16b ++ st1 {$dat[0].4s},[$out],#16 ++ b.eq 3f ++ add $ctr,$ctr,#1 ++ mov $ivec.s[3],$ctr ++ b 1b ++3: ++ ldp d8,d9,[sp],#16 ++ ret ++.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks ++___ ++}}} ++######################################## ++{ my %opcode = ( ++ "sm4e" => 0xcec08400, ++ "sm4ekey" => 0xce60c800); ++ ++ sub unsm4 { ++ my ($mnemonic,$arg)=@_; ++ ++ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o ++ && ++ sprintf ".inst\t0x%08x\t//%s %s", ++ $opcode{$mnemonic}|$1|($2<<5)|($3<<16), ++ $mnemonic,$arg; ++ } ++} ++ ++open SELF,$0; ++while(<SELF>) { ++ next if (/^#!/); ++ last if (!s/^#/\/\// and !/^$/); ++ print; ++} ++close SELF; ++ ++foreach(split("\n",$code)) { ++ s/\`([^\`]*)\`/eval($1)/ge; ++ ++ s/\b(sm4\w+)\s+([qv].*)/unsm4($1,$2)/ge; ++ print $_,"\n"; ++} ++ ++close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/sm4/build.info b/crypto/sm4/build.info +index b65a7d149e..e27aa49e67 100644 +--- a/crypto/sm4/build.info ++++ b/crypto/sm4/build.info +@@ -1,4 +1,32 @@ + LIBS=../../libcrypto +-SOURCE[../../libcrypto]=\ +- sm4.c + ++IF[{- !$disabled{asm} -}] ++ $SM4DEF_aarch64=SM4_ASM ++ $SM4ASM_aarch64=sm4-armv8.S ++ ++ # Now that we have defined all the arch specific variables, use the ++ # appropriate one, and define the appropriate macros ++ IF[$SM4ASM_{- $target{asm_arch} -}] ++ $SM4ASM=$SM4ASM_{- $target{asm_arch} -} ++ $SM4DEF=$SM4DEF_{- $target{asm_arch} -} ++ ENDIF ++ENDIF ++ ++SOURCE[../../libcrypto]= $SM4ASM sm4.c ++ ++ ++# Implementations are now spread across several libraries, so the defines ++# need to be applied to all affected libraries and modules. ++DEFINE[../../libcrypto]=$SM4DEF ++DEFINE[../../providers/libfips.a]=$SM4DEF ++DEFINE[../../providers/libdefault.a]=$SM4DEF ++# We only need to include the SM4DEF stuff in the legacy provider when it's a ++# separate module and it's dynamically linked with libcrypto. Otherwise, it ++# already gets everything that the static libcrypto.a has, and doesn't need it ++# added again. ++IF[{- !$disabled{module} && !$disabled{shared} -}] ++ DEFINE[../providers/liblegacy.a]=$SM4DEF ++ENDIF ++ ++GENERATE[sm4-armv8.S]=asm/sm4-armv8.pl ++INCLUDE[sm4-armv8.o]=.. +diff --git a/include/crypto/sm4_platform.h b/include/crypto/sm4_platform.h +new file mode 100644 +index 0000000000..42c8b44a43 +--- /dev/null ++++ b/include/crypto/sm4_platform.h +@@ -0,0 +1,48 @@ ++/* ++ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++#ifndef OSSL_SM4_PLATFORM_H ++# define OSSL_SM4_PLATFORM_H ++# pragma once ++ ++# if defined(OPENSSL_CPUID_OBJ) ++# if (defined(__arm__) || defined(__arm) || defined(__aarch64__)) ++# include "arm_arch.h" ++# if __ARM_MAX_ARCH__>=8 ++# define HWSM4_CAPABLE (OPENSSL_armcap_P & ARMV8_SM4) ++# define HWSM4_set_encrypt_key sm4_v8_set_encrypt_key ++# define HWSM4_set_decrypt_key sm4_v8_set_decrypt_key ++# define HWSM4_encrypt sm4_v8_encrypt ++# define HWSM4_decrypt sm4_v8_decrypt ++# define HWSM4_cbc_encrypt sm4_v8_cbc_encrypt ++# define HWSM4_ecb_encrypt sm4_v8_ecb_encrypt ++# define HWSM4_ctr32_encrypt_blocks sm4_v8_ctr32_encrypt_blocks ++# endif ++# endif ++# endif /* OPENSSL_CPUID_OBJ */ ++ ++# if defined(HWSM4_CAPABLE) ++int HWSM4_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key); ++int HWSM4_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key); ++void HWSM4_encrypt(const unsigned char *in, unsigned char *out, ++ const SM4_KEY *key); ++void HWSM4_decrypt(const unsigned char *in, unsigned char *out, ++ const SM4_KEY *key); ++void HWSM4_cbc_encrypt(const unsigned char *in, unsigned char *out, ++ size_t length, const SM4_KEY *key, ++ unsigned char *ivec, const int enc); ++void HWSM4_ecb_encrypt(const unsigned char *in, unsigned char *out, ++ size_t length, const SM4_KEY *key, ++ const int enc); ++void HWSM4_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, ++ size_t len, const void *key, ++ const unsigned char ivec[16]); ++# endif /* HWSM4_CAPABLE */ ++ ++#endif /* OSSL_SM4_PLATFORM_H */ +diff --git a/providers/implementations/ciphers/cipher_sm4.h b/providers/implementations/ciphers/cipher_sm4.h +index f7f833fcb4..01a031a74d 100644 +--- a/providers/implementations/ciphers/cipher_sm4.h ++++ b/providers/implementations/ciphers/cipher_sm4.h +@@ -9,6 +9,7 @@ + + #include "prov/ciphercommon.h" + #include "crypto/sm4.h" ++#include "crypto/sm4_platform.h" + + typedef struct prov_cast_ctx_st { + PROV_CIPHER_CTX base; /* Must be first */ +diff --git a/providers/implementations/ciphers/cipher_sm4_gcm_hw.c b/providers/implementations/ciphers/cipher_sm4_gcm_hw.c +index 6bcd1ec406..c0c9b22bd3 100644 +--- a/providers/implementations/ciphers/cipher_sm4_gcm_hw.c ++++ b/providers/implementations/ciphers/cipher_sm4_gcm_hw.c +@@ -12,6 +12,7 @@ + */ + + #include "cipher_sm4_gcm.h" ++#include "crypto/sm4_platform.h" + + static int sm4_gcm_initkey(PROV_GCM_CTX *ctx, const unsigned char *key, + size_t keylen) +@@ -20,9 +21,22 @@ static int sm4_gcm_initkey(PROV_GCM_CTX *ctx, const unsigned char *key, + SM4_KEY *ks = &actx->ks.ks; + + ctx->ks = ks; +- ossl_sm4_set_key(key, ks); +- CRYPTO_gcm128_init(&ctx->gcm, ks, (block128_f)ossl_sm4_encrypt); +- ctx->ctr = (ctr128_f)NULL; ++# ifdef HWSM4_CAPABLE ++ if (HWSM4_CAPABLE) { ++ HWSM4_set_encrypt_key(key, ks); ++ CRYPTO_gcm128_init(&ctx->gcm, ks, (block128_f) HWSM4_encrypt); ++# ifdef HWSM4_ctr32_encrypt_blocks ++ ctx->ctr = (ctr128_f) HWSM4_ctr32_encrypt_blocks; ++# else /* HWSM4_ctr32_encrypt_blocks */ ++ ctx->ctr = (ctr128_f)NULL; ++# endif ++ } else ++# endif /* HWSM4_CAPABLE */ ++ { ++ ossl_sm4_set_key(key, ks); ++ CRYPTO_gcm128_init(&ctx->gcm, ks, (block128_f)ossl_sm4_encrypt); ++ ctx->ctr = (ctr128_f)NULL; ++ } + ctx->key_set = 1; + + return 1; +diff --git a/providers/implementations/ciphers/cipher_sm4_hw.c b/providers/implementations/ciphers/cipher_sm4_hw.c +index 0db04b1a74..4cd3d3d669 100644 +--- a/providers/implementations/ciphers/cipher_sm4_hw.c ++++ b/providers/implementations/ciphers/cipher_sm4_hw.c +@@ -15,14 +15,59 @@ static int cipher_hw_sm4_initkey(PROV_CIPHER_CTX *ctx, + PROV_SM4_CTX *sctx = (PROV_SM4_CTX *)ctx; + SM4_KEY *ks = &sctx->ks.ks; + +- ossl_sm4_set_key(key, ks); + ctx->ks = ks; + if (ctx->enc + || (ctx->mode != EVP_CIPH_ECB_MODE +- && ctx->mode != EVP_CIPH_CBC_MODE)) +- ctx->block = (block128_f)ossl_sm4_encrypt; +- else +- ctx->block = (block128_f)ossl_sm4_decrypt; ++ && ctx->mode != EVP_CIPH_CBC_MODE)) { ++#ifdef HWSM4_CAPABLE ++ if (HWSM4_CAPABLE) { ++ HWSM4_set_encrypt_key(key, ks); ++ ctx->block = (block128_f)HWSM4_encrypt; ++ ctx->stream.cbc = NULL; ++#ifdef HWSM4_cbc_encrypt ++ if (ctx->mode == EVP_CIPH_CBC_MODE) ++ ctx->stream.cbc = (cbc128_f)HWSM4_cbc_encrypt; ++ else ++#endif ++#ifdef HWSM4_ecb_encrypt ++ if (ctx->mode == EVP_CIPH_ECB_MODE) ++ ctx->stream.ecb = (ecb128_f)HWSM4_ecb_encrypt; ++ else ++#endif ++#ifdef HWSM4_ctr32_encrypt_blocks ++ if (ctx->mode == EVP_CIPH_CTR_MODE) ++ ctx->stream.ctr = (ctr128_f)HWSM4_ctr32_encrypt_blocks; ++ else ++#endif ++ (void)0; /* terminate potentially open 'else' */ ++ } else ++#endif ++ { ++ ossl_sm4_set_key(key, ks); ++ ctx->block = (block128_f)ossl_sm4_encrypt; ++ } ++ } else { ++#ifdef HWSM4_CAPABLE ++ if (HWSM4_CAPABLE) { ++ HWSM4_set_decrypt_key(key, ks); ++ ctx->block = (block128_f)HWSM4_decrypt; ++ ctx->stream.cbc = NULL; ++#ifdef HWSM4_cbc_encrypt ++ if (ctx->mode == EVP_CIPH_CBC_MODE) ++ ctx->stream.cbc = (cbc128_f)HWSM4_cbc_encrypt; ++#endif ++#ifdef HWSM4_ecb_encrypt ++ if (ctx->mode == EVP_CIPH_ECB_MODE) ++ ctx->stream.ecb = (ecb128_f)HWSM4_ecb_encrypt; ++#endif ++ } else ++#endif ++ { ++ ossl_sm4_set_key(key, ks); ++ ctx->block = (block128_f)ossl_sm4_decrypt; ++ } ++ } ++ + return 1; + } + +@@ -31,7 +76,7 @@ IMPLEMENT_CIPHER_HW_COPYCTX(cipher_hw_sm4_copyctx, PROV_SM4_CTX) + # define PROV_CIPHER_HW_sm4_mode(mode) \ + static const PROV_CIPHER_HW sm4_##mode = { \ + cipher_hw_sm4_initkey, \ +- ossl_cipher_hw_chunked_##mode, \ ++ ossl_cipher_hw_generic_##mode, \ + cipher_hw_sm4_copyctx \ + }; \ + const PROV_CIPHER_HW *ossl_prov_cipher_hw_sm4_##mode(size_t keybits) \ +-- +2.37.3.windows.1 + |