diff options
| author | CoprDistGit <infra@openeuler.org> | 2023-08-19 12:42:12 +0000 | 
|---|---|---|
| committer | CoprDistGit <infra@openeuler.org> | 2023-08-19 12:42:12 +0000 | 
| commit | 4171d506f218bcfaa1c8d40b36596abfab644899 (patch) | |
| tree | 0e62eb269ac2671efc121d54075ddd016bfe4db3 /zlib-Optimize-CRC32.patch | |
| parent | ec1a16522fc9c54ca26742f73b94eab08a38f25d (diff) | |
automatic import of zlib
Diffstat (limited to 'zlib-Optimize-CRC32.patch')
| -rw-r--r-- | zlib-Optimize-CRC32.patch | 94 | 
1 files changed, 94 insertions, 0 deletions
| diff --git a/zlib-Optimize-CRC32.patch b/zlib-Optimize-CRC32.patch new file mode 100644 index 0000000..c0495a6 --- /dev/null +++ b/zlib-Optimize-CRC32.patch @@ -0,0 +1,94 @@ +From 8935175266e343ac1d52106e2e790810b54f26c1 Mon Sep 17 00:00:00 2001 +From: liqiang64 <liqiang64@huawei.com> +Date: Tue, 3 Dec 2019 03:22:00 +0000 +Subject: [PATCH] zlib: Optimize CRC32 + +This patch uses the NEON instruction set to optimize the CRC32 +algorithm. + +On the ARM architecture, we can optimize the efficiency of +crc32 through the interface provided by the neon instruction +set. +Modify by Li Qiang. +--- + crc32.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 50 insertions(+) + +diff --git a/crc32.c b/crc32.c +index f8357b0..5c53068 100644 +--- a/crc32.c ++++ b/crc32.c +@@ -28,6 +28,9 @@ + #endif /* MAKECRCH */ +  + #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ ++#ifdef __aarch64__ ++#include "arm_acle.h" ++#endif +  +  /* +   A CRC of a message is computed on N braids of words in the message, where +@@ -600,6 +603,49 @@ const z_crc_t FAR * ZEXPORT get_crc_table() +     return (const z_crc_t FAR *)crc_table; + } + ++#ifdef __aarch64__ ++ulg crc32_neon(crc, buf, len) ++    unsigned long crc; ++    const unsigned char FAR *buf; ++    z_size_t len; ++{ ++    register uint32_t crc_result = 0xFFFFFFFFU; ++    register const uint8_t  *buf1; ++    register const uint16_t *buf2; ++    register const uint32_t *buf4; ++    register const uint64_t *buf8; ++    int64_t length = (int64_t)len; ++    buf8 = (const  uint64_t *)(const void *)buf; ++ ++    if (buf == NULL) { ++        crc_result = 0xffffffffL; ++    } else { ++        crc_result = crc^0xffffffffUL; ++ ++        while((length -= sizeof(uint64_t)) >= 0) { ++            crc_result = __crc32d((crc_result), *buf8++); ++        } ++ ++        buf4 = (const uint32_t *)(const void *)buf8; ++        if (length & sizeof(uint32_t)) { ++            crc_result = __crc32w((crc_result), *buf4++); ++        } ++ ++        buf2 = (const uint16_t *)(const void *)buf4; ++        if(length & sizeof(uint16_t)) { ++            crc_result = __crc32h((crc_result), *buf2++); ++        } ++ ++        buf1 = (const uint8_t *)(const void *)buf2; ++        if (length & sizeof(uint8_t)) { ++            crc_result = __crc32b((crc_result), *buf1); ++        } ++    } ++ ++    return (crc_result ^ 0xffffffffL); ++} ++#endif ++ + /* ========================================================================= +  * Use ARM machine instructions if available. This will compute the CRC about +  * ten times faster than the braided calculation. This code does not check for +@@ -750,6 +794,10 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) +     z_size_t last, last2, i; +     z_size_t num; + ++    #ifdef __aarch64__ ++    return crc32_neon(crc, buf, len); ++    #endif ++ +     /* Return initial CRC, if requested. */ +     if (buf == Z_NULL) return 0; +  +--  +2.27.0 + | 
