Skip to content

Commit fb6bd14

Browse files
committed
Implement support for Intel crc32 instruction (SSE 4.2)
1 parent 77948e7 commit fb6bd14

File tree

2 files changed

+96
-12
lines changed

2 files changed

+96
-12
lines changed

build_detect_platform

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,75 +71,81 @@ if [ "$CXX" = "g++" ]; then
7171
MEMCMP_FLAG="-fno-builtin-memcmp"
7272
fi
7373

74+
SSE_FLAG=
75+
if [ "$CXX" = "g++" ]; then
76+
# use hardware crc32 instruction
77+
SSE_FLAG="-msse4.2"
78+
fi
79+
7480
case "$TARGET_OS" in
7581
CYGWIN_*)
7682
PLATFORM=OS_LINUX
77-
COMMON_FLAGS="$MEMCMP_FLAG -lpthread -DOS_LINUX -DCYGWIN"
83+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -lpthread -DOS_LINUX -DCYGWIN"
7884
PLATFORM_LDFLAGS="-lpthread"
7985
PORT_FILE=port/port_posix.cc
8086
;;
8187
Darwin)
8288
PLATFORM=OS_MACOSX
83-
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
89+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -DOS_MACOSX"
8490
PLATFORM_SHARED_EXT=dylib
8591
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
8692
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name $INSTALL_PATH/"
8793
PORT_FILE=port/port_posix.cc
8894
;;
8995
Linux)
9096
PLATFORM=OS_LINUX
91-
COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX"
97+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -pthread -DOS_LINUX"
9298
PLATFORM_LDFLAGS="-pthread"
9399
PORT_FILE=port/port_posix.cc
94100
;;
95101
SunOS)
96102
PLATFORM=OS_SOLARIS
97-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS"
103+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_SOLARIS"
98104
PLATFORM_LIBS="-lpthread -lrt"
99105
PORT_FILE=port/port_posix.cc
100106
;;
101107
FreeBSD)
102108
PLATFORM=OS_FREEBSD
103-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD"
109+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_FREEBSD"
104110
PLATFORM_LIBS="-lpthread"
105111
PORT_FILE=port/port_posix.cc
106112
;;
107113
NetBSD)
108114
PLATFORM=OS_NETBSD
109-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD"
115+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_NETBSD"
110116
PLATFORM_LIBS="-lpthread -lgcc_s"
111117
PORT_FILE=port/port_posix.cc
112118
;;
113119
OpenBSD)
114120
PLATFORM=OS_OPENBSD
115-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD"
121+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_OPENBSD"
116122
PLATFORM_LDFLAGS="-pthread"
117123
PORT_FILE=port/port_posix.cc
118124
;;
119125
DragonFly)
120126
PLATFORM=OS_DRAGONFLYBSD
121-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
127+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
122128
PLATFORM_LIBS="-lpthread"
123129
PORT_FILE=port/port_posix.cc
124130
;;
125131
OS_ANDROID_CROSSCOMPILE)
126132
PLATFORM=OS_ANDROID
127-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
133+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
128134
PLATFORM_LDFLAGS="" # All pthread features are in the Android C library
129135
PORT_FILE=port/port_posix.cc
130136
CROSS_COMPILE=true
131137
;;
132138
HP-UX)
133139
PLATFORM=OS_HPUX
134-
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX"
140+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_HPUX"
135141
PLATFORM_LDFLAGS="-pthread"
136142
PORT_FILE=port/port_posix.cc
137143
# man ld: +h internal_name
138144
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
139145
;;
140146
IOS)
141147
PLATFORM=IOS
142-
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
148+
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -DOS_MACOSX"
143149
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
144150
PORT_FILE=port/port_posix.cc
145151
PLATFORM_SHARED_EXT=

util/crc32c.cc

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,27 @@
1010
#include <stdint.h>
1111
#include "util/coding.h"
1212

13+
#if defined(_MSC_VER)
14+
#include <intrin.h>
15+
static inline bool IsSSE42Available() {
16+
int cpu_info[4];
17+
__cpuid(cpu_info, 1);
18+
return (cpu_info[2] & (1 << 20)) != 0;
19+
}
20+
#elif defined(__GNUC__)
21+
#include <cpuid.h>
22+
#include <nmmintrin.h>
23+
static inline bool IsSSE42Available() {
24+
unsigned int eax, ebx, ecx, edx;
25+
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
26+
return (ecx & (1 << 20)) != 0;
27+
}
28+
#else
29+
static inline bool IsSSE42Available() {
30+
return false;
31+
}
32+
#endif
33+
1334
namespace leveldb {
1435
namespace crc32c {
1536

@@ -283,7 +304,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
283304
return DecodeFixed32(reinterpret_cast<const char*>(p));
284305
}
285306

286-
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
307+
// Used to fetch a naturally-aligned 64-bit word in little endian byte-order
308+
static inline uint64_t LE_LOAD64(const uint8_t *p) {
309+
return DecodeFixed64(reinterpret_cast<const char*>(p));
310+
}
311+
312+
static uint32_t Extend_NoSSE(uint32_t crc, const char* buf, size_t size) {
287313
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
288314
const uint8_t *e = p + size;
289315
uint32_t l = crc ^ 0xffffffffu;
@@ -328,5 +354,57 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
328354
return l ^ 0xffffffffu;
329355
}
330356

357+
// For further improvements see Intel publication at:
358+
// http://download.intel.com/design/intarch/papers/323405.pdf
359+
static uint32_t Extend_SSE42(uint32_t crc, const char* buf, size_t size) {
360+
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
361+
const uint8_t *e = p + size;
362+
uint32_t l = crc ^ 0xffffffffu;
363+
364+
#define STEP1 do { \
365+
l = _mm_crc32_u8(l, *p++); \
366+
} while (0)
367+
#define STEP4 do { \
368+
l = _mm_crc32_u32(l, LE_LOAD32(p)); \
369+
p += 4; \
370+
} while (0)
371+
#define STEP8 do { \
372+
l = _mm_crc32_u64(l, LE_LOAD64(p)); \
373+
p += 8; \
374+
} while (0)
375+
376+
if (size > 16) {
377+
// Process unaligned bytes
378+
for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) {
379+
STEP1;
380+
}
381+
// Process 8 bytes at a time
382+
while ((e-p) >= 8) {
383+
STEP8;
384+
}
385+
// Process 4 bytes at a time
386+
if ((e-p) >= 4) {
387+
STEP4;
388+
}
389+
}
390+
// Process the last few bytes
391+
while (p != e) {
392+
STEP1;
393+
}
394+
#undef STEP4
395+
#undef STEP1
396+
return l ^ 0xffffffffu;
397+
}
398+
399+
uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
400+
static int flag = -1;
401+
if (flag == -1) {
402+
flag = IsSSE42Available() ? 1 : 0;
403+
}
404+
return flag
405+
? Extend_SSE42(crc, buf, size)
406+
: Extend_NoSSE(crc, buf, size);
407+
}
408+
331409
} // namespace crc32c
332410
} // namespace leveldb

0 commit comments

Comments
 (0)