Skip to content

Commit abcbca4

Browse files
author
Roger Kowalewski
committed
integrate Hyksort into our sortbench
1 parent 5a5f2cb commit abcbca4

File tree

7 files changed

+105
-23
lines changed

7 files changed

+105
-23
lines changed

sort-bench/Makefile

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ DASHROOT?=$(HOME)/opt/dash-0.3.0
88
DASHCXX=$(DASHROOT)/bin/dash-mpic++ -dash:verbose
99
DASHNVCC=$(DASHROOT)/bin/dash-nvcc -dash:verbose
1010

11-
DASHCXXFLAGS = -O3 -Iinclude -Iexternal -DNDEBUG
12-
CXXFLAGS = $(DASHCXXFLAGS) -std=c++14
11+
#CXXFLAGS = -O0 -ggdb3 -std=c++14 -Iinclude -Iexternal -DENABLE_LOGGING
12+
CXXFLAGS+= -O3 -Iinclude -Iexternal -DNDEBUG -std=c++14
13+
14+
DASHCXXFLAGS = $(CXXFLAGS)
1315

1416
SIZE ?= $$((500 * (2**20)))
1517

@@ -63,15 +65,24 @@ build/mpi.x: include/mpi/sortbench.h sortbench.cc external/MP-sort/libmpsort-mpi
6365
@mkdir -p build
6466
$(MPICXX) $(CXXFLAGS) -Iexternal -o $@ -DUSE_MPI sortbench.cc external/MP-sort/libmpsort-mpi.a external/MP-sort/libradixsort.a
6567

68+
# NOTE: for USORT we use a k-way Hypercube-Mergesort, they recommend to set CFLAGS="-DKWAY=4"
69+
build/usort.x: external/usort/binUtils.o external/usort/parUtils.o sortbench.cc
70+
@mkdir -p build
71+
$(MPICXX) $(CXXFLAGS) $(CFLAGS) -Iexternal -o $@ -fopenmp -DUSE_USORT sortbench.cc external/usort/binUtils.o external/usort/parUtils.o
72+
6673
external/MP-sort/libmpsort-mpi.a:
6774
make -C external/MP-sort libmpsort-mpi.a
6875

6976
external/MP-sort/libradixsort.a:
7077
make -C external/MP-sort libradixsort.a
7178

79+
external/usort/%.o:
80+
make -C external/usort CFLAGS="$(CFLAGS)" $*.o
81+
7282
*.x: sortbench.cc
7383

7484
clean:
7585
rm -f build/*.x
7686
rm -f build/*.nv
7787
make -C external/MP-sort clean
88+
make -C external/usort clean

sort-bench/include/dash/sortbench.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ inline void parallel_rand(RandomIt begin, RandomIt end, Gen const g)
3737
begin.pattern().team().barrier();
3838
}
3939

40-
template <typename RandomIt, typename Cmp>
41-
inline void parallel_sort(RandomIt begin, RandomIt end, Cmp cmp)
40+
template <typename Container, typename Cmp>
41+
inline void parallel_sort(Container & c, Cmp cmp)
4242
{
43+
auto begin = c.begin();
44+
auto end = c.end();
4345
assert(!(end < begin));
4446

4547
dash::sort(begin, end);

sort-bench/include/mpi/sortbench.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,14 @@ inline void parallel_rand(RandomIt begin, RandomIt end, Gen const g)
4949
}
5050
}
5151

52-
template <typename RandomIt, typename Cmp>
53-
inline void parallel_sort(RandomIt begin, RandomIt end, Cmp cmp)
52+
template <typename Container, typename Cmp>
53+
inline void parallel_sort(Container & c, Cmp cmp)
5454
{
55+
auto begin = c.begin();
56+
auto end = c.end();
5557
assert(!(end < begin));
5658

57-
using value_t = typename std::iterator_traits<RandomIt>::value_type;
59+
using value_t = typename Container::value_type;
5860

5961
auto const mysize = static_cast<size_t>(std::distance(begin, end));
6062

@@ -81,7 +83,7 @@ inline bool parallel_verify(RandomIt begin, RandomIt end, Cmp cmp)
8183
auto const n = static_cast<size_t>(std::distance(begin, end));
8284
size_t nerror = 0;
8385

84-
for (size_t idx = 0; idx < n; ++idx) {
86+
for (size_t idx = 1; idx < n; ++idx) {
8587
auto it = begin + idx;
8688
if (cmp(*it, *(it - 1))) {
8789
LOG("Failed sort order: {prev: " << *(it - 1) << ", cur: " << *it

sort-bench/include/openmp/sortbench.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@ inline void parallel_rand(RandomIt begin, RandomIt end, Gen const g)
3333
}
3434
}
3535

36-
template <typename RandomIt, typename Compare>
37-
inline void parallel_sort(RandomIt begin, RandomIt end, Compare cmp)
36+
template <typename Container, typename Cmp>
37+
inline void parallel_sort(Container & c, Cmp cmp)
3838
{
39+
auto begin = c.begin();
40+
auto end = c.end();
3941
if (rand() & 0x100) {
4042
#pragma omp parallel
4143
#pragma omp master

sort-bench/include/tbb/sortbench.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ inline void parallel_rand(RandomIt begin, RandomIt end, Gen const g)
3737
});
3838
}
3939

40-
template <typename RandomIt, typename Compare>
41-
inline void parallel_sort(RandomIt begin, RandomIt end, Compare cmp)
40+
template <typename Container, typename Cmp>
41+
inline void parallel_sort(Container & c, Cmp cmp)
4242
{
43+
auto begin = c.begin();
44+
auto end = c.end();
4345
pss::parallel_stable_sort(begin, end, cmp);
4446
}
4547

sort-bench/include/usort/sortbench.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <mpi.h>
2+
#include <cassert>
3+
#include <iterator>
4+
#include <random>
5+
6+
#include <usort/include/binUtils.h>
7+
#include <usort/include/ompUtils.h>
8+
#include <usort/include/parUtils.h>
9+
10+
#include <util/Logging.h>
11+
12+
template <typename RandomIt, typename Gen>
13+
inline void parallel_rand(RandomIt begin, RandomIt end, Gen const g)
14+
{
15+
assert(!(end < begin));
16+
17+
auto const n = static_cast<size_t>(std::distance(begin, end));
18+
19+
int ThisTask;
20+
MPI_Comm_rank(MPI_COMM_WORLD, &ThisTask);
21+
22+
std::seed_seq seed{std::random_device{}(), static_cast<unsigned>(ThisTask)};
23+
std::mt19937 rng(seed);
24+
25+
for (size_t idx = 0; idx < n; ++idx) {
26+
auto it = begin + idx;
27+
*it = g(n, idx, rng);
28+
}
29+
}
30+
31+
template <typename Container, typename Cmp>
32+
inline void parallel_sort(Container & c, Cmp cmp)
33+
{
34+
auto begin = c.begin();
35+
auto end = c.end();
36+
assert(!(end < begin));
37+
38+
using value_t = typename Container::value_type;
39+
40+
auto const mysize = static_cast<size_t>(std::distance(begin, end));
41+
42+
::par::HyperQuickSort_kway(c, MPI_COMM_WORLD);
43+
}
44+
45+
template <typename RandomIt, typename Cmp>
46+
inline bool parallel_verify(RandomIt begin, RandomIt end, Cmp cmp)
47+
{
48+
assert(!(end < begin));
49+
50+
auto const n = static_cast<size_t>(std::distance(begin, end));
51+
size_t nerror = 0;
52+
53+
for (size_t idx = 1; idx < n; ++idx) {
54+
auto it = begin + idx;
55+
if (cmp(*it, *(it - 1))) {
56+
LOG("Failed sort order: {prev: " << *(it - 1) << ", cur: " << *it
57+
<< "}");
58+
++nerror;
59+
}
60+
}
61+
62+
return nerror == 0;
63+
}

sort-bench/sortbench.cc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include <libdash.h>
1616
#elif defined(USE_MPI)
1717
#include <mpi/sortbench.h>
18+
#elif defined(USE_USORT)
19+
#include <usort/sortbench.h>
1820
#endif
1921

2022
#include <intel/IndexedValue.h>
@@ -53,7 +55,7 @@ void print_header(std::string const& app, double mb, int P)
5355
std::cout << std::setw(20) << "NTasks: " << P << "\n";
5456
std::cout << std::setw(20) << "Size: " << std::fixed << std::setprecision(2)
5557
<< mb << "\n";
56-
#if defined(USE_DASH) || defined(USE_MPI)
58+
#if defined(USE_DASH) || defined(USE_MPI) || defined(USE_USORT)
5759
std::cout << std::setw(20) << "Size per Unit (MB): " << std::fixed
5860
<< std::setprecision(2) << mb / P;
5961
#endif
@@ -74,8 +76,6 @@ void Test(Container & c, size_t N, int r, size_t P,std::string const& test_case)
7476
LOG("N :" << N);
7577

7678
using key_t = typename Container::value_type;
77-
auto begin = c.begin();
78-
auto end = c.end();
7979

8080
auto const mb = N * sizeof(key_t) / MB;
8181

@@ -113,7 +113,7 @@ void Test(Container & c, size_t N, int r, size_t P,std::string const& test_case)
113113

114114
for (size_t iter = 0; iter < NITER + BURN_IN; ++iter) {
115115
parallel_rand(
116-
begin, end, [](size_t total, size_t index, std::mt19937& rng) {
116+
c.begin(), c.end(), [](size_t total, size_t index, std::mt19937& rng) {
117117
// return index;
118118
// return total - index;
119119
return dist(rng);
@@ -134,11 +134,11 @@ void Test(Container & c, size_t N, int r, size_t P,std::string const& test_case)
134134

135135
auto const start = ChronoClockNow();
136136

137-
parallel_sort(begin, end, std::less<key_t>());
137+
parallel_sort(c, std::less<key_t>());
138138

139139
auto const duration = ChronoClockNow() - start;
140140

141-
auto const ret = parallel_verify(begin, end, std::less<key_t>());
141+
auto const ret = parallel_verify(c.begin(), c.end(), std::less<key_t>());
142142

143143
if (!ret) {
144144
std::cerr << "validation failed! (n = " << N << ")\n";
@@ -163,7 +163,7 @@ void Test(Container & c, size_t N, int r, size_t P,std::string const& test_case)
163163
}
164164

165165
#ifdef USE_DASH
166-
begin.pattern().team().barrier();
166+
c.begin().pattern().team().barrier();
167167
if (iter == (NITER + BURN_IN - 1) &&
168168
// if the id of this task is included in samples
169169
(std::find(
@@ -181,11 +181,11 @@ void Test(Container & c, size_t N, int r, size_t P,std::string const& test_case)
181181

182182
int main(int argc, char* argv[])
183183
{
184-
using key_t = int32_t;
184+
using key_t = double;
185185

186186
if (argc < 2) {
187187
std::cout << std::string(argv[0])
188-
#if defined(USE_DASH) || defined(USE_MPI)
188+
#if defined(USE_DASH) || defined(USE_MPI) || defined(USE_USORT)
189189
<< " [nbytes per rank]\n";
190190
#else
191191
<< " [nbytes]\n";
@@ -205,7 +205,7 @@ int main(int argc, char* argv[])
205205
auto const gsize_bytes = mysize * P;
206206
auto const N = nl * P;
207207
auto const r = dash::myid();
208-
#elif defined(USE_MPI)
208+
#elif defined(USE_MPI) || defined(USE_USORT)
209209
MPI_Init(&argc, &argv);
210210
int P;
211211
MPI_Comm_size(MPI_COMM_WORLD, &P);
@@ -256,7 +256,7 @@ int main(int argc, char* argv[])
256256

257257
#if defined(USE_DASH)
258258
dash::finalize();
259-
#elif defined(USE_MPI)
259+
#elif defined(USE_MPI) || defined(USE_USORT)
260260
MPI_Finalize();
261261
#endif
262262

0 commit comments

Comments
 (0)