summaryrefslogtreecommitdiff
path: root/zlib-Optimize-CRC32.patch
diff options
context:
space:
mode:
Diffstat (limited to 'zlib-Optimize-CRC32.patch')
-rw-r--r--zlib-Optimize-CRC32.patch94
1 files changed, 94 insertions, 0 deletions
diff --git a/zlib-Optimize-CRC32.patch b/zlib-Optimize-CRC32.patch
new file mode 100644
index 0000000..c0495a6
--- /dev/null
+++ b/zlib-Optimize-CRC32.patch
@@ -0,0 +1,94 @@
+From 8935175266e343ac1d52106e2e790810b54f26c1 Mon Sep 17 00:00:00 2001
+From: liqiang64 <liqiang64@huawei.com>
+Date: Tue, 3 Dec 2019 03:22:00 +0000
+Subject: [PATCH] zlib: Optimize CRC32
+
+This patch uses the NEON instruction set to optimize the CRC32
+algorithm.
+
+On the ARM architecture, we can optimize the efficiency of
+crc32 through the interface provided by the neon instruction
+set.
+Modify by Li Qiang.
+---
+ crc32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 50 insertions(+)
+
+diff --git a/crc32.c b/crc32.c
+index f8357b0..5c53068 100644
+--- a/crc32.c
++++ b/crc32.c
+@@ -28,6 +28,9 @@
+ #endif /* MAKECRCH */
+
+ #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
++#ifdef __aarch64__
++#include "arm_acle.h"
++#endif
+
+ /*
+ A CRC of a message is computed on N braids of words in the message, where
+@@ -600,6 +603,49 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
+ return (const z_crc_t FAR *)crc_table;
+ }
+
++#ifdef __aarch64__
++ulg crc32_neon(crc, buf, len)
++ unsigned long crc;
++ const unsigned char FAR *buf;
++ z_size_t len;
++{
++ register uint32_t crc_result = 0xFFFFFFFFU;
++ register const uint8_t *buf1;
++ register const uint16_t *buf2;
++ register const uint32_t *buf4;
++ register const uint64_t *buf8;
++ int64_t length = (int64_t)len;
++ buf8 = (const uint64_t *)(const void *)buf;
++
++ if (buf == NULL) {
++ crc_result = 0xffffffffL;
++ } else {
++ crc_result = crc^0xffffffffUL;
++
++ while((length -= sizeof(uint64_t)) >= 0) {
++ crc_result = __crc32d((crc_result), *buf8++);
++ }
++
++ buf4 = (const uint32_t *)(const void *)buf8;
++ if (length & sizeof(uint32_t)) {
++ crc_result = __crc32w((crc_result), *buf4++);
++ }
++
++ buf2 = (const uint16_t *)(const void *)buf4;
++ if(length & sizeof(uint16_t)) {
++ crc_result = __crc32h((crc_result), *buf2++);
++ }
++
++ buf1 = (const uint8_t *)(const void *)buf2;
++ if (length & sizeof(uint8_t)) {
++ crc_result = __crc32b((crc_result), *buf1);
++ }
++ }
++
++ return (crc_result ^ 0xffffffffL);
++}
++#endif
++
+ /* =========================================================================
+ * Use ARM machine instructions if available. This will compute the CRC about
+ * ten times faster than the braided calculation. This code does not check for
+@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
+ z_size_t last, last2, i;
+ z_size_t num;
+
++ #ifdef __aarch64__
++ return crc32_neon(crc, buf, len);
++ #endif
++
+ /* Return initial CRC, if requested. */
+ if (buf == Z_NULL) return 0;
+
+--
+2.27.0
+