18
18
19
19
#include < memory>
20
20
#include < vector>
21
+ #include < array>
21
22
#include < limits>
22
23
#include < cmath>
23
24
@@ -249,6 +250,24 @@ class ElementAccessor {
249
250
};
250
251
251
252
253
+ template <typename T>
254
+ inline bool isfinite (T& t) {
255
+ return true ;
256
+ }
257
+
258
+
259
+ template <>
260
+ inline bool isfinite<double >(double & t) {
261
+ return std::isfinite (t);
262
+ }
263
+
264
+ template <>
265
+ inline bool isfinite<float >(float & t) {
266
+ return std::isfinite (t);
267
+ }
268
+
269
+
270
+
252
271
template <typename T, typename TElementAccessor>
253
272
class Statistics {
254
273
/* Finds finite min and max values in the array
@@ -263,7 +282,7 @@ class Statistics {
263
282
sorted = true ;
264
283
265
284
if _PLACEMENT_SORT_CONSTEXPR (std::numeric_limits<T>::has_infinity) {
266
- while (!std:: isfinite (prev_value) && first_finite_i < size) {
285
+ while (!isfinite (prev_value) && first_finite_i < size) {
267
286
++first_finite_i;
268
287
const T value = array.get_value (first_finite_i);
269
288
if (value < prev_value)
@@ -281,7 +300,7 @@ class Statistics {
281
300
sorted = false ;
282
301
prev_value = value;
283
302
if _PLACEMENT_SORT_CONSTEXPR (std::numeric_limits<T>::has_infinity)
284
- if (!std:: isfinite (value))
303
+ if (!isfinite (value))
285
304
continue ;
286
305
if (value < min)
287
306
min = value;
@@ -340,10 +359,10 @@ class PlaceCalculator<T, TStatistics, typename std::enable_if<std::is_floating_p
340
359
PlaceCalculator (const TStatistics& statistics, size_t size) : min(statistics.get_min()), last_index(size - 1 ) {
341
360
T max = statistics.get_max ();
342
361
invariant = ((long double )size - 1 .) / (max - min);
343
- if (!std:: isfinite (invariant) || invariant == 0 .) {
362
+ if (!isfinite (invariant) || invariant == 0 .) {
344
363
split = true ;
345
364
split_value = (0.5 * max + 0.5 * min);
346
- if (!std:: isfinite (split_value))
365
+ if (!isfinite (split_value))
347
366
split_value = max;
348
367
}
349
368
}
@@ -373,7 +392,7 @@ class PlaceCalculator<T, TStatistics, typename std::enable_if<std::is_floating_p
373
392
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
374
393
template <typename T>
375
394
static inline void prefetch (T p) {
376
- _mm_prefetch (p, _MM_HINT_NTA);
395
+ _mm_prefetch (( char *) p, _MM_HINT_NTA);
377
396
}
378
397
#else
379
398
template <typename T>
@@ -429,20 +448,22 @@ template <typename TElementAccessor, typename TPlaceCalculator, typename counter
429
448
static inline void move_elements_in_place (TElementAccessor& array, const TPlaceCalculator& placer, counters_t & counters) {
430
449
const size_t size = array.get_count ();
431
450
constexpr typename counters_t ::value_type topBit = (typename counters_t ::value_type)1 << (sizeof (typename counters_t ::value_type)*8 - 1 );
432
- const size_t block_size = (size > 512 *1024 ) ? 32 : 4 ;
451
+ constexpr size_t block_size_high = 32 ;
452
+ constexpr size_t block_size_low = 4 ;
453
+ const size_t block_size = (size > 512 *1024 ) ? block_size_high : block_size_low;
433
454
434
455
/* This algorithm moves elements to their places and sorts out collisions with no extra memory except already available counters.
435
456
* It uses highest bit in counters to mark elements which are already moved to their destination place.
436
457
* This way is fastest though looks ugly. It saves memory traffic. */
437
458
438
459
for (size_t sorted = 0 ; sorted < size; ) {
439
- size_t places[block_size ];
460
+ size_t places[block_size_high ];
440
461
const size_t block_end = sorted + std::min (block_size, size - sorted);
441
462
for (size_t i = sorted; i < block_end; ++i) { // prefetch counters
442
463
if (!(topBit & counters[i])) {
443
464
const size_t place = placer.get_place (array.get_value (i));
444
465
places[i - sorted] = place;
445
- _mm_prefetch (&counters[place], _MM_HINT_NTA );
466
+ prefetch (&counters[place]);
446
467
}
447
468
}
448
469
for (size_t i = sorted; i < block_end; ++i) { // move
@@ -707,7 +728,7 @@ void placement_sort(TElementAccessor& array) {
707
728
}
708
729
709
730
/* TODO:
710
- * MSVS C++11 support
731
+ * Fix MSVS low performance
711
732
* port tests
712
733
* topBit hider functors
713
734
* fix (36, initFexpGrowth<float> non buff case
0 commit comments