Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optimized crc32 for Power 8+ processors #478

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
*.gcno
*.gcov

/crc32_test
/crc32_test64
/crc32_testsh
/example
/example64
/examplesh
Expand Down
72 changes: 72 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(VERSION "1.2.12.1")

option(ASM686 "Enable building i686 assembly implementation")
option(AMD64 "Enable building amd64 assembly implementation")
option(POWER "Enable building power implementation")

set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
Expand Down Expand Up @@ -140,6 +141,73 @@ if(CMAKE_COMPILER_IS_GNUCC)
add_definitions(-DASMV)
set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
endif()

# test to see if we can use a GNU indirect function to detect and load optimized code at runtime
CHECK_C_SOURCE_COMPILES("
static int test_ifunc_native(void)
{
return 1;
}
static int (*(check_ifunc_native(void)))(void)
{
return test_ifunc_native;
}
int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\")));
int main(void)
{
return 0;
}
" HAS_C_ATTR_IFUNC)

if(HAS_C_ATTR_IFUNC)
add_definitions(-DHAVE_IFUNC)
set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h)
endif()

if(POWER)
# Test to see if we can use the optimizations for Power
CHECK_C_SOURCE_COMPILES("
#ifndef _ARCH_PPC
#error \"Target is not Power\"
#endif
#ifndef __BUILTIN_CPU_SUPPORTS__
#error \"Target doesn't support __builtin_cpu_supports()\"
#endif
int main() { return 0; }
" HAS_POWER_SUPPORT)

if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC)
add_definitions(-DZ_POWER_OPT)

set(CMAKE_REQUIRED_FLAGS -mcpu=power8)
CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8)

if(POWER8)
add_definitions(-DZ_POWER8)
set(ZLIB_POWER8
contrib/power/crc32_z_power8.c)

set_source_files_properties(
${ZLIB_POWER8}
PROPERTIES COMPILE_FLAGS -mcpu=power8)
endif()

set(CMAKE_REQUIRED_FLAGS -mcpu=power9)
CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9)

if(POWER9)
add_definitions(-DZ_POWER9)
set(ZLIB_POWER9 )

set_source_files_properties(
${ZLIB_POWER9}
PROPERTIES COMPILE_FLAGS -mcpu=power9)
endif()

set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h)
set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9})
endif()
endif()
endif()

if(MSVC)
Expand Down Expand Up @@ -234,6 +302,10 @@ add_executable(example test/example.c)
target_link_libraries(example zlib)
add_test(example example)

add_executable(crc32_test test/crc32_test.c)
target_link_libraries(crc32_test zlib)
add_test(crc32_test crc32_test)

add_executable(minigzip test/minigzip.c)
target_link_libraries(minigzip zlib)

Expand Down
43 changes: 33 additions & 10 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,19 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA)

all: static shared

static: example$(EXE) minigzip$(EXE)
static: crc32_test$(EXE) example$(EXE) minigzip$(EXE)

shared: examplesh$(EXE) minigzipsh$(EXE)
shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)

all64: example64$(EXE) minigzip64$(EXE)
all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE)

check: test

test: all teststatic testshared

teststatic: static
@TMPST=tmpst_$$; \
if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST ; then \
if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST && ./crc32_test; then \
echo ' *** zlib test OK ***'; \
else \
echo ' *** zlib test FAILED ***'; false; \
Expand All @@ -100,7 +100,7 @@ testshared: shared
DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \
SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \
TMPSH=tmpsh_$$; \
if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH; then \
if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH && ./crc32_testsh; then \
echo ' *** zlib shared test OK ***'; \
else \
echo ' *** zlib shared test FAILED ***'; false; \
Expand All @@ -109,7 +109,7 @@ testshared: shared

test64: all64
@TMP64=tmp64_$$; \
if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64; then \
if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64 && ./crc32_test64; then \
echo ' *** zlib 64-bit test OK ***'; \
else \
echo ' *** zlib 64-bit test FAILED ***'; false; \
Expand Down Expand Up @@ -143,12 +143,18 @@ match.lo: match.S
mv _match.o match.lo
rm -f _match.s

crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c

example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c

minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c

crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c

example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c

Expand All @@ -162,6 +168,9 @@ adler32.o: $(SRCDIR)adler32.c
crc32.o: $(SRCDIR)crc32.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c

crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c
$(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c

deflate.o: $(SRCDIR)deflate.c
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c

Expand Down Expand Up @@ -212,6 +221,11 @@ crc32.lo: $(SRCDIR)crc32.c
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
-@mv objs/crc32.o $@

crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c
-@mv objs/crc32_z_power8.o $@

deflate.lo: $(SRCDIR)deflate.c
-@mkdir objs 2>/dev/null || test -d objs
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
Expand Down Expand Up @@ -285,18 +299,27 @@ placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a
ln -s $@ $(SHAREDLIBM)
-@rmdir objs

crc32_test$(EXE): crc32_test.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS)

example$(EXE): example.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS)

minigzip$(EXE): minigzip.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)

crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV)

examplesh$(EXE): example.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ example.o -L. $(SHAREDLIBV)

minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
$(CC) $(CFLAGS) -o $@ minigzip.o -L. $(SHAREDLIBV)

crc32_test64$(EXE): crc32_test64.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS)

example64$(EXE): example64.o $(STATICLIB)
$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)

Expand Down Expand Up @@ -366,8 +389,8 @@ zconf: $(SRCDIR)zconf.h.in
mostlyclean: clean
clean:
rm -f *.o *.lo *~ \
example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
example64$(EXE) minigzip64$(EXE) \
crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \
infcover \
libz.* foo.gz so_locations \
_match.s maketree contrib/infback9/*.o
Expand All @@ -389,7 +412,7 @@ tags:

adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
Expand All @@ -399,7 +422,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr

adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
Expand Down
69 changes: 69 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,75 @@ EOF
fi
fi

# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
echo >> configure.log
cat > $test.c <<EOF
static int test_ifunc_native(void)
{
return 1;
}

static int (*(check_ifunc_native(void)))(void)
{
return test_ifunc_native;
}

int test_ifunc(void) __attribute__ ((ifunc ("check_ifunc_native")));
EOF

if tryboth $CC -c $CFLAGS $test.c; then
SFLAGS="${SFLAGS} -DHAVE_IFUNC"
CFLAGS="${CFLAGS} -DHAVE_IFUNC"
echo "Checking for attribute(ifunc) support... Yes." | tee -a configure.log
else
echo "Checking for attribute(ifunc) support... No." | tee -a configure.log
fi

# Test to see if we can use the optimizations for Power
echo >> configure.log
cat > $test.c <<EOF
#ifndef _ARCH_PPC
#error "Target is not Power"
#endif
#if !(defined(__PPC64__) || defined(__powerpc64__))
#error "Target is not 64 bits"
#endif
#ifndef HAVE_IFUNC
#error "Target doesn't support ifunc"
#endif
#ifndef __BUILTIN_CPU_SUPPORTS__
#error "Target doesn't support __builtin_cpu_supports()"
#endif
EOF

if tryboth $CC -c $CFLAGS $test.c; then
echo "int main(void){return 0;}" > $test.c

if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then
POWER8="-DZ_POWER8"
PIC_OBJC="${PIC_OBJC} crc32_z_power8.lo"
OBJC="${OBJC} crc32_z_power8.o"
echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log
else
echo "Checking for -mcpu=power8 support... No." | tee -a configure.log
fi

if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then
POWER9="-DZ_POWER9"
PIC_OBJC="${PIC_OBJC}"
OBJC="${OBJC}"
echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log
else
echo "Checking for -mcpu=power9 support... No." | tee -a configure.log
fi

SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
echo "Checking for Power optimizations support... Yes." | tee -a configure.log
else
echo "Checking for Power optimizations support... No." | tee -a configure.log
fi

# show the results in the log
echo >> configure.log
echo ALL = $ALL >> configure.log
Expand Down
9 changes: 9 additions & 0 deletions contrib/README.contrib
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ ada/ by Dmitriy Anisimkov <anisimkov@yahoo.com>
blast/ by Mark Adler <madler@alumni.caltech.edu>
Decompressor for output of PKWare Data Compression Library (DCL)

gcc/ by Matheus Castanho <msc@linux.ibm.com>
and Rogerio Alves <rcardoso@linux.ibm.com>
Optimization helpers using GCC-specific extensions

delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro>
Support for Delphi and C++ Builder

Expand Down Expand Up @@ -42,6 +46,11 @@ minizip/ by Gilles Vollant <info@winimage.com>
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
Support for Pascal

power/ by Daniel Black <daniel@linux.ibm.com>
Matheus Castanho <msc@linux.ibm.com>
and Rogerio Alves <rcardoso@linux.ibm.com>
Optimized functions for Power processors

puff/ by Mark Adler <madler@alumni.caltech.edu>
Small, low memory usage inflate. Also serves to provide an
unambiguous description of the deflate format.
Expand Down
Loading