From cabb23e44089ea00c9d2161289c86e6b7cbe2ef5 Mon Sep 17 00:00:00 2001 From: Austin Donisan Date: Fri, 9 Feb 2024 07:21:42 +0000 Subject: [PATCH] reduce result hash size to 2^14 to reduce unecessary pages --- 1brc.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/1brc.c b/1brc.c index c8ef67e..53f7cd2 100644 --- a/1brc.c +++ b/1brc.c @@ -50,6 +50,7 @@ #define HASH_SHIFT 17 // 16 is 1% faster for non-10k, 17 is 10% faster for 10k #define HASH_LONG_SHIFT 14 // 14 is requried to fit 10k entries +#define HASH_RESULT_SHIFT 14 // wrapping and fitting nicely in pages is better than extra buffer at the end #define HASH_ENTRIES (1 << HASH_SHIFT) @@ -212,8 +213,9 @@ void print256(__m256i var); #define HASH_CITY_OFFSET 5 // log2(SHORT_CITY_LENGTH) #define HASH_CITY_LONG_OFFSET 7 // log2(LONG_CITY_LENGTH) -#define HASH_SHORT_MASK (((1 << HASH_SHIFT ) - 1) << MIN(HASH_DATA_OFFSET, HASH_CITY_OFFSET)) -#define HASH_LONG_MASK (((1 << HASH_LONG_SHIFT) - 1) << HASH_CITY_LONG_OFFSET) +#define HASH_SHORT_MASK (((1 << HASH_SHIFT ) - 1) << MIN(HASH_DATA_OFFSET, HASH_CITY_OFFSET)) +#define HASH_LONG_MASK (((1 << HASH_LONG_SHIFT ) - 1) << HASH_CITY_LONG_OFFSET) +#define HASH_RESULT_MASK (((1 << HASH_RESULT_SHIFT) - 1) << HASH_CITY_OFFSET) #define HASH_DATA_SHIFT (HASH_DATA_OFFSET - MIN(HASH_DATA_OFFSET, HASH_CITY_OFFSET)) #define HASH_CITY_SHIFT (HASH_CITY_OFFSET - MIN(HASH_DATA_OFFSET, HASH_CITY_OFFSET)) @@ -400,6 +402,7 @@ void merge(Results * restrict dst, Results * restrict src) { hashValue = hash_city(row.city.reg); } + hashValue = (hashValue >> (HASH_SHIFT - HASH_RESULT_SHIFT)) & HASH_RESULT_MASK; while (1) { ResultsRow *dstRow = dst->rows + (hashValue / SHORT_CITY_LENGTH); __m256i xor = _mm256_xor_si256(dstRow->city.reg, row.city.reg); @@ -413,12 +416,13 @@ void merge(Results * restrict dst, Results * restrict src) { if (_mm256_testz_si256(dstRow->city.reg, dstRow->city.reg)) { dst->refs[dst->numCities] = (ResultsRef){hashValue}; - dst->rows[hashValue / SHORT_CITY_LENGTH] = row; + dst->rows[hashValue / SHORT_CITY_LENGTH] = row; dst->numCities++; break; } + hashValue += SHORT_CITY_LENGTH; - hashValue &= HASH_SHORT_MASK; + hashValue &= HASH_RESULT_MASK; } } } @@ -551,7 +555,6 @@ void convert_hash_to_results(hash_t * restrict hash, Results * restrict out) { max = MAX(max, rows[i].max); } - out->refs[i] = (ResultsRef) {offset}; if (unlikely(city_is_long(city))) { LongCity *longCity = hash->p.hashedCitiesLong + city.longRef.index; out->longCities[out->numLongCities] = *longCity; @@ -560,7 +563,19 @@ void convert_hash_to_results(hash_t * restrict hash, Results * restrict out) { out->numLongCities++; } - out->rows[offset / SHORT_CITY_LENGTH] = (ResultsRow) {city, sum, count, min, max}; + offset = (offset >> (HASH_SHIFT - HASH_RESULT_SHIFT)) & HASH_RESULT_MASK; + if (offset / SHORT_CITY_LENGTH * SHORT_CITY_LENGTH != offset) { + fprintf(stderr, "nooo: %d\n", offset); + } + while (1) { + if (_mm256_testz_si256(out->rows[offset / SHORT_CITY_LENGTH].city.reg, out->rows[offset / SHORT_CITY_LENGTH].city.reg)) { + out->rows[offset / SHORT_CITY_LENGTH] = (ResultsRow) {city, sum, count, min, max}; + break; + } + offset += SHORT_CITY_LENGTH; + offset &= HASH_RESULT_MASK; + } + out->refs[i] = (ResultsRef) {offset}; } }