forked from zlib-ng/zlib-ng
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add optimized crc32 for POWER8 and later processors
This commit adds an optimized version of the crc32 function based on crc32-vpmsum from https://github.com/antonblanchard/crc32-vpmsum/ . The code has been relicensed to the zlib license. This is the C implementation created by Rogerio Alves <rogealve@br.ibm.com> It makes use of vector instructions to speed up CRC32 algorithm. Decompression times were improved by +30% on tests. Based on Daniel Black's work for the original zlib (madler/zlib#478).
- Loading branch information
1 parent
d87e6d9
commit ea2541c
Showing
7 changed files
with
1,996 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* Helper functions to work around issues with clang builtins | ||
* Copyright (C) 2021 IBM Corporation | ||
* | ||
* Authors: | ||
* Daniel Black <daniel@linux.vnet.ibm.com> | ||
* Rogerio Alves <rogealve@br.ibm.com> | ||
* Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> | ||
* | ||
* For conditions of distribution and use, see copyright notice in zlib.h | ||
*/ | ||
|
||
#ifndef CLANG_WORKAROUNDS_H | ||
#define CLANG_WORKAROUNDS_H | ||
|
||
/* | ||
* These stubs fix clang incompatibilities with GCC builtins. | ||
*/ | ||
|
||
#ifndef __builtin_crypto_vpmsumw | ||
#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb | ||
#endif | ||
#ifndef __builtin_crypto_vpmsumd | ||
#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb | ||
#endif | ||
|
||
static inline | ||
__vector unsigned long long __attribute__((overloadable)) | ||
vec_ld(int __a, const __vector unsigned long long* __b) | ||
{ | ||
return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b); | ||
} | ||
|
||
/* | ||
* GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang | ||
* does not recognize this type. On GCC this builtin is translated to a | ||
* xxpermdi instruction that only moves the registers __a, __b instead generates | ||
* a load. | ||
* | ||
* Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0. | ||
*/ | ||
static inline | ||
__vector unsigned long long __builtin_pack_vector (unsigned long __a, | ||
unsigned long __b) | ||
{ | ||
#if defined(__BIG_ENDIAN__) | ||
__vector unsigned long long __v = {__a, __b}; | ||
#else | ||
__vector unsigned long long __v = {__b, __a}; | ||
#endif | ||
return __v; | ||
} | ||
|
||
/* | ||
* Clang 7 changed the behavior of vec_xxpermdi in order to provide the same | ||
* behavior of GCC. That means code adapted to Clang >= 7 does not work on | ||
* Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6. | ||
*/ | ||
#if !defined vec_xxpermdi || __clang_major__ <= 6 | ||
|
||
static inline | ||
unsigned long __builtin_unpack_vector (__vector unsigned long long __v, | ||
int __o) | ||
{ | ||
return __v[__o]; | ||
} | ||
|
||
#if defined(__BIG_ENDIAN__) | ||
#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) | ||
#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) | ||
#else | ||
#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) | ||
#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) | ||
#endif | ||
|
||
#else | ||
|
||
static inline | ||
unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) | ||
{ | ||
#if defined(__BIG_ENDIAN__) | ||
return vec_xxpermdi(__v, __v, 0x0)[0]; | ||
#else | ||
return vec_xxpermdi(__v, __v, 0x3)[0]; | ||
#endif | ||
} | ||
|
||
static inline | ||
unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) | ||
{ | ||
#if defined(__BIG_ENDIAN__) | ||
return vec_xxpermdi(__v, __v, 0x3)[0]; | ||
#else | ||
return vec_xxpermdi(__v, __v, 0x0)[0]; | ||
#endif | ||
} | ||
#endif /* vec_xxpermdi */ | ||
|
||
#endif |
Oops, something went wrong.