diff options
author | CoprDistGit <infra@openeuler.org> | 2023-08-19 12:42:12 +0000 |
---|---|---|
committer | CoprDistGit <infra@openeuler.org> | 2023-08-19 12:42:12 +0000 |
commit | 4171d506f218bcfaa1c8d40b36596abfab644899 (patch) | |
tree | 0e62eb269ac2671efc121d54075ddd016bfe4db3 /zlib-Optimize-CRC32.patch | |
parent | ec1a16522fc9c54ca26742f73b94eab08a38f25d (diff) |
automatic import of zlib
Diffstat (limited to 'zlib-Optimize-CRC32.patch')
-rw-r--r-- | zlib-Optimize-CRC32.patch | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/zlib-Optimize-CRC32.patch b/zlib-Optimize-CRC32.patch new file mode 100644 index 0000000..c0495a6 --- /dev/null +++ b/zlib-Optimize-CRC32.patch @@ -0,0 +1,94 @@ +From 8935175266e343ac1d52106e2e790810b54f26c1 Mon Sep 17 00:00:00 2001 +From: liqiang64 <liqiang64@huawei.com> +Date: Tue, 3 Dec 2019 03:22:00 +0000 +Subject: [PATCH] zlib: Optimize CRC32 + +This patch uses the NEON instruction set to optimize the CRC32 +algorithm. + +On the ARM architecture, we can optimize the efficiency of +crc32 through the interface provided by the neon instruction +set. +Modify by Li Qiang. +--- + crc32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 50 insertions(+) + +diff --git a/crc32.c b/crc32.c +index f8357b0..5c53068 100644 +--- a/crc32.c ++++ b/crc32.c +@@ -28,6 +28,9 @@ + #endif /* MAKECRCH */ + + #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ ++#ifdef __aarch64__ ++#include "arm_acle.h" ++#endif + + /* + A CRC of a message is computed on N braids of words in the message, where +@@ -600,6 +603,49 @@ const z_crc_t FAR * ZEXPORT get_crc_table() + return (const z_crc_t FAR *)crc_table; + } + ++#ifdef __aarch64__ ++ulg crc32_neon(crc, buf, len) ++ unsigned long crc; ++ const unsigned char FAR *buf; ++ z_size_t len; ++{ ++ register uint32_t crc_result = 0xFFFFFFFFU; ++ register const uint8_t *buf1; ++ register const uint16_t *buf2; ++ register const uint32_t *buf4; ++ register const uint64_t *buf8; ++ int64_t length = (int64_t)len; ++ buf8 = (const uint64_t *)(const void *)buf; ++ ++ if (buf == NULL) { ++ crc_result = 0xffffffffL; ++ } else { ++ crc_result = crc^0xffffffffUL; ++ ++ while((length -= sizeof(uint64_t)) >= 0) { ++ crc_result = __crc32d((crc_result), *buf8++); ++ } ++ ++ buf4 = (const uint32_t *)(const void *)buf8; ++ if (length & sizeof(uint32_t)) { ++ crc_result = __crc32w((crc_result), *buf4++); ++ } ++ ++ buf2 = (const uint16_t *)(const void *)buf4; ++ if(length & sizeof(uint16_t)) { ++ crc_result = __crc32h((crc_result), *buf2++); ++ } ++ ++ buf1 = (const uint8_t *)(const void *)buf2; ++ if (length & sizeof(uint8_t)) { ++ crc_result = __crc32b((crc_result), *buf1); ++ } ++ } ++ ++ return (crc_result ^ 0xffffffffL); ++} ++#endif ++ + /* ========================================================================= + * Use ARM machine instructions if available. This will compute the CRC about + * ten times faster than the braided calculation. This code does not check for +@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) + z_size_t last, last2, i; + z_size_t num; + ++ #ifdef __aarch64__ ++ return crc32_neon(crc, buf, len); ++ #endif ++ + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; + +-- +2.27.0 + |