Skip to content

Commit 6fa425e

Browse files
committed
- added tuning and graphics to cmake
- replaced RTDSC timers with a more standard one - made gnuplot generated plots available via tune_it.sh - moved timing points in tune.c to avoid elimination by optimization
1 parent 5809141 commit 6fa425e

19 files changed

+385
-129
lines changed

.gitignore

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ tuning_list
8282
etc/tune
8383
2kprime.1
8484
drprimes.txt
85+
etc/multiplying*
86+
etc/squaring*
8587

8688
# ignore stuff generated by "make manual" and "make poster"
8789
*.aux
@@ -134,3 +136,20 @@ build*/
134136
# kdevelop section
135137
.kdev4/
136138
*.kdev4
139+
140+
# ignore cmake files
141+
CMakeFiles
142+
Makefile
143+
cmake_install.cmake
144+
145+
146+
147+
148+
149+
150+
151+
152+
153+
154+
155+

CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ include(sources.cmake)
3232
# Options
3333
#-----------------------------------------------------------------------------
3434
option(BUILD_SHARED_LIBS "Build shared library and only the shared library if \"ON\", default is static" OFF)
35-
35+
option(BUILD_TUNING "Run a tuning program for the fast multiplication/squaring algorithms if \"ON\"" OFF)
36+
option(BUILD_GRAPHS "Run a benchmark of the fast multiplication/squaring algorithms and make graphics if \"ON\"" OFF)
3637
#-----------------------------------------------------------------------------
3738
# Compose CFLAGS
3839
#-----------------------------------------------------------------------------
@@ -137,6 +138,14 @@ if(BUILD_TESTING)
137138
add_subdirectory(demo)
138139
endif()
139140

141+
#-----------------------------------------------------------------------------
142+
# tuning and benchmark targets
143+
#-----------------------------------------------------------------------------
144+
145+
if(BUILD_TUNING OR BUILD_GRAPHS)
146+
add_subdirectory(etc ${CMAKE_CURRENT_SOURCE_DIR}/etc)
147+
endif()
148+
140149
#-----------------------------------------------------------------------------
141150
# Install/export targets and files
142151
#-----------------------------------------------------------------------------

demo/timing.c

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -55,42 +55,35 @@ static unsigned int lbit(void)
5555
}
5656
}
5757

58-
/* RDTSC from Scott Duplichan */
59-
static uint64_t TIMFUNC(void)
60-
{
61-
#if defined __GNUC__
62-
#if defined(__i386__) || defined(__x86_64__)
63-
/* version from http://www.mcs.anl.gov/~kazutomo/rdtsc.html
64-
* the old code always got a warning issued by gcc, clang did not complain...
65-
*/
66-
unsigned hi, lo;
67-
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
68-
return ((uint64_t)lo)|(((uint64_t)hi)<<32);
69-
#else /* gcc-IA64 version */
70-
unsigned long result;
71-
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");
72-
73-
while (__builtin_expect((int) result == -1, 0))
74-
__asm__ __volatile__("mov %0=ar.itc":"=r"(result)::"memory");
75-
76-
return result;
77-
#endif
7858

79-
/* Microsoft and Intel Windows compilers */
80-
#elif defined _M_IX86
81-
__asm rdtsc
82-
#elif defined _M_AMD64
83-
return __rdtsc();
84-
#elif defined _M_IA64
85-
#if defined __INTEL_COMPILER
86-
#include <ia64intrin.h>
59+
#if defined(_WIN32)
60+
# include <windows.h>
8761
#endif
88-
return __getReg(3116);
62+
63+
static uint64_t TIMFUNC(void)
64+
{
65+
#if _POSIX_C_SOURCE >= 199309L
66+
#define LTM_BILLION 1000000000
67+
struct timespec ts;
68+
69+
/* TODO: Sets errno in case of error. Use? */
70+
clock_gettime(CLOCK_MONOTONIC, &ts);
71+
return (((uint64_t)ts.tv_sec) * LTM_BILLION + (uint64_t)ts.tv_nsec);
72+
#elif defined(_WIN32)
73+
LARGE_INTEGER ticks;
74+
QueryPerformanceCounter(&ticks);
75+
return (uint64_t)ticks.QuadPart;
8976
#else
90-
#error need rdtsc function for this build
77+
clock_t t;
78+
t = clock();
79+
if (t < (clock_t)(0)) {
80+
return (uint64_t)(0);
81+
}
82+
return (uint64_t)(t);
9183
#endif
9284
}
9385

86+
9487
#define DO2(x) do { mp_err err = x; err = x; (void)err; }while(0)
9588
#define DO4(x) DO2(x); DO2(x)
9689
#define DO8(x) DO4(x); DO4(x)
@@ -141,6 +134,12 @@ int main(int argc, char **argv)
141134
int n, cnt, ix, old_kara_m, old_kara_s, old_toom_m, old_toom_s;
142135
unsigned rr;
143136

137+
#ifdef _WIN32
138+
LARGE_INTEGER Frequency;
139+
#else
140+
struct timespec ts;
141+
#endif
142+
144143
CHECK_OK(mp_init(&a));
145144
CHECK_OK(mp_init(&b));
146145
CHECK_OK(mp_init(&c));
@@ -150,10 +149,21 @@ int main(int argc, char **argv)
150149

151150
srand(LTM_TIMING_RAND_SEED);
152151

153-
152+
#ifdef _WIN32
153+
QueryPerformanceFrequency(&Frequency);
154+
CLK_PER_SEC = (uint64) Frequency;
155+
#elif _POSIX_C_SOURCE >= 199309L
156+
/* returns -1 for an error and 0 for okay, sets errno (not used here) */
157+
if (clock_getres(CLOCK_MONOTONIC, &ts)) {
158+
fprintf(stderr, "%d, clock_getres failed\n", __LINE__);
159+
exit(EXIT_FAILURE);
160+
}
161+
CLK_PER_SEC = LTM_BILLION / ts.tv_nsec;
162+
#else
154163
CLK_PER_SEC = TIMFUNC();
155164
sleep(1);
156165
CLK_PER_SEC = TIMFUNC() - CLK_PER_SEC;
166+
#endif
157167

158168
printf("CLK_PER_SEC == %" PRIu64 "\n", CLK_PER_SEC);
159169

doc/bn.tex

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ \section{License}
102102

103103
\section{Building LibTomMath}
104104

105+
105106
LibTomMath is meant to be very ``GCC friendly'' as it comes with a makefile well suited for GCC.
106107
However, the library will also build in MSVC, Borland C out of the box. For any other ISO C
107108
compiler a makefile will have to be made by the end
@@ -270,6 +271,53 @@ \subsection{Testing}
270271
test was invoked. If an error is detected the program will exit with a dump of the relevant
271272
numbers it was working with.
272273

274+
\subsection{CMake}
275+
Some of the options above are also available with CMake.
276+
277+
\subsubsection{Shared Library}
278+
The default is a static library. To produce a shared library use the CMake option
279+
\begin{alltt}
280+
-DBUILD_SHARED_LIBS=ON
281+
\end{alltt}
282+
283+
\subsubsection{Testing}
284+
To run the testsuite use option
285+
\begin{alltt}
286+
-DBUILD_TESTING=ON
287+
\end{alltt}
288+
289+
\subsubsection{Tuning}
290+
To run the tuning itself use option
291+
\begin{alltt}
292+
-DBUILD_TUNING=ON
293+
\end{alltt}
294+
295+
To run a benchmark with the tuned library and print plots of the benchmark tables use option
296+
\begin{alltt}
297+
-DBUILD_GRAPHS=ON
298+
\end{alltt}
299+
300+
To compile with LTO (Link Time Optimization) use option
301+
\begin{alltt}
302+
-DCOMPILE_LTO=ON
303+
\end{alltt}
304+
305+
There are several build types available:
306+
\begin{description}
307+
\item[Debug] Build a library with debugging symbols (\texttt{-g3}) and no extra optimization
308+
\item[Release] Build the normal release version (\texttt{-O3 -funroll-loops -fomit-frame-pointer}) (default)
309+
\item[RelWithDebInfo] Build a library with debugging symbols (\texttt{-g3 -O2}) and a bit of optimization
310+
\item[MinSizeRel] Build a small sized library (\texttt{-Os})
311+
\end{description}
312+
The build types are case-sensitive!
313+
314+
Choose one with:
315+
\begin{alltt}
316+
-DCMAKE_BUILD_TYPE=buildtype
317+
\end{alltt}
318+
319+
320+
273321
\section{Build Configuration}
274322
LibTomMath can configured at build time in two phases we shall call ``depends'' and
275323
``trims''. Each phase changes how the library is built and they are applied one after another
@@ -1600,13 +1648,35 @@ \section{Tuning Polynomial Basis Routines}
16001648
make tune
16011649
\end{alltt}
16021650

1603-
This will run a benchmark, computes the medians, rewrites \texttt{bncore.c}, and recompiles
1604-
\texttt{bncore.c} and relinks the library.
1651+
With CMake
1652+
\begin{alltt}
1653+
cmake --build /path/to/build/dir -DBUILD_TUNING=ON
1654+
\end{alltt}
1655+
1656+
1657+
This will run a benchmark, computes the medians, rewrites \texttt{tommath\_cutoffs.h}, recompiles
1658+
\texttt{cutoffs.c}, and relinks the library.
16051659

16061660
The benchmark itself can be fine--tuned in the file \texttt{etc/tune\_it.sh}.
16071661

16081662
The program \texttt{etc/tune} is also able to print a list of values for printing curves with e.g.:
1609-
\texttt{gnuplot}. type \texttt{./etc/tune -h} to get a list of all available options.
1663+
\texttt{gnuplot}. Type \texttt{./etc/tune -h} to get a list of all the available options. There
1664+
are a lot.
1665+
1666+
To get some nice plots in \texttt{etc} try
1667+
1668+
\begin{alltt}
1669+
make graphs
1670+
\end{alltt}
1671+
1672+
With CMake
1673+
\begin{alltt}
1674+
cmake --build /path/to/build/dir -DBUILD_GRAPHS=ON
1675+
\end{alltt}
1676+
1677+
This will run a benchmark, computes the medians, rewrites \texttt{tommath\_cutoffs.h}, recompiles
1678+
\texttt{cutoffs.c}, relinks the library and runs gnuplot to print plots in the PNG format. The size
1679+
of the images is fixed in the file \texttt{etc/plot\_graphs.gp} and has to be changed manually.
16101680

16111681
\chapter{Modular Reduction}
16121682

etc/CMakeLists.txt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# SPDX-License-Identifier: Unlicense
2+
#
3+
# LibTomMath, a free open source portable number theoretic multiple-precision
4+
# integer (MPI) library written entirely in C.
5+
#
6+
7+
cmake_minimum_required(VERSION 3.10)
8+
9+
set(LTM_TUNE tune-ltm)
10+
11+
# This file can be included from the top level or used stand-alone
12+
if(PROJECT_NAME)
13+
set(LIBRARY_NAME ${PROJECT_NAME})
14+
else()
15+
# Define an independent project and all the necessary stuff around
16+
project(${LTM_TUNE}
17+
LANGUAGES C)
18+
set(LIBRARY_NAME libtommath)
19+
find_package(${LIBRARY_NAME})
20+
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
21+
set(CMAKE_BUILD_TYPE "Release")
22+
endif()
23+
endif()
24+
25+
add_executable(tune
26+
${CMAKE_CURRENT_SOURCE_DIR}/tune.c
27+
)
28+
29+
target_include_directories(tune PRIVATE
30+
${CMAKE_CURRENT_SOURCE_DIR}
31+
${CMAKE_CURRENT_SOURCE_DIR}/..
32+
)
33+
34+
target_link_libraries(tune PRIVATE
35+
${LIBRARY_NAME}
36+
)
37+
38+
target_compile_options(tune PRIVATE
39+
${LTM_C_FLAGS}
40+
)
41+
target_link_options(tune BEFORE PUBLIC
42+
${LTM_LD_FLAGS}
43+
)
44+
45+
if(BUILD_GRAPHS)
46+
# used in tune_it.sh
47+
find_program(GNUPLOT gnuplot)
48+
add_custom_command(TARGET tune POST_BUILD COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tune_it.sh 1000 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} VERBATIM)
49+
else()
50+
add_custom_command(TARGET tune POST_BUILD COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tune_it.sh WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} VERBATIM)
51+
endif()

etc/makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ LTM_TUNE_CFLAGS = $(CFLAGS) $(LTM_CFLAGS) -Wall -W -Wextra -Wshadow -O3 -I../
66
# libname when you can't install the lib with install
77
LIBNAME=../libtommath.a
88

9-
all: pprime tune test_standalone mersenne drprime 2kprime mont
9+
all: pprime tune test_standalone mersenne drprime 2kprime mont getlimbsize graph
1010

1111
#provable primes
1212
pprime: pprime.o
@@ -36,10 +36,15 @@ drprime: drprime.o
3636
mont: mont.o
3737
$(CC) $(LTM_TUNE_CFLAGS) mont.o $(LIBNAME) -o mont
3838

39+
# Make pretty pictures (1000 is the maximum number of limbs to print for mul/sqr)
40+
# "tune" runs twice because it runs automatically when build.
41+
graphs: tune
42+
./tune_it.sh 1000
3943

4044
clean:
4145
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat \
42-
tuning_list multiplying squaring test *.da *.dyn *.dpi *~
46+
tuning_list get_limbsize out *.da *.dyn *.dpi *~ cmake_install.cmake Makefile
4347
rm -rf .libs
48+
rm -rf CMakeFiles
4449

4550
.PHONY: tune

etc/makefile.icc

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,10 @@ tune: tune.o
3232
$(CC) $(CFLAGS) tune.o $(LIBNAME) -o tune
3333
./tune_it.sh
3434

35-
# same app but using RDTSC for higher precision [requires 80586+], coff based gcc installs [e.g. ming, cygwin, djgpp]
36-
tune86: tune.c
37-
nasm -f coff timer.asm
38-
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
39-
40-
# for cygwin
41-
tune86c: tune.c
42-
nasm -f gnuwin32 timer.asm
43-
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86
44-
45-
#make tune86 for linux or any ELF format
46-
tune86l: tune.c
47-
nasm -f elf -DUSE_ELF timer.asm
48-
$(CC) -DX86_TIMER $(CFLAGS) tune.c timer.o $(LIBNAME) -o tune86l
35+
# Make pretty pictures (1000 is the maximum number of limbs to print for mul/sqr)
36+
# "tune" runs twice because it runs automatically when build.
37+
graphs: tune
38+
./tune_it.sh 1000
4939

5040
# spits out mersenne primes
5141
mersenne: mersenne.o
@@ -64,4 +54,4 @@ mont: mont.o
6454

6555

6656
clean:
67-
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime tune86 tune86l mont 2kprime pprime.dat *.il tuning_list
57+
rm -f *.log *.o *.obj *.exe pprime tune mersenne drprime mont 2kprime pprime.dat get_limbsize *.il tuning_list

etc/plot_graphs.gp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
set term pngcairo size 720,540
2+
# Good for most colorblinds
3+
set colorsequence podo
4+
5+
set key top left;
6+
7+
set ylabel "Time"
8+
set xlabel "Operand size (limbs)"
9+
10+
set output "multiplying".ARG1.".png";
11+
set title "Comparing fast and slow multiplying [".ARG1." bits limbsize]";
12+
plot "multiplying".ARG1."" using 1:2 w lines t "slow", "multiplying".ARG1."" using 1:3 w lines t "fast"
13+
14+
set output "squaring".ARG1.".png";
15+
set title "Comparing fast and slow squaring [".ARG1." bits limbsize]";
16+
plot "squaring".ARG1."" using 1:2 w lines t "slow", "squaring".ARG1."" using 1:3 w lines t "fast"
17+
18+
19+

0 commit comments

Comments
 (0)