Skip to content

Commit

Permalink
check finish while we have starts vector loaded
Browse files Browse the repository at this point in the history
  • Loading branch information
austindonisan committed Feb 2, 2024
1 parent 50d8765 commit d3b4bab
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions 1brc.c
Original file line number Diff line number Diff line change
Expand Up @@ -621,19 +621,22 @@ void process_chunk(const char * const restrict base, const unsigned int * offset

alignas(64) long nums[STRIDE];
alignas(32) unsigned int starts[STRIDE];
bool checkFinished;

__m256i starts_v = _mm256_loadu_si256((__m256i *)offsets);
__m256i ends_v = _mm256_loadu_si256((__m256i *)(offsets + 1));
__m256i finished_v = _mm256_set1_epi32(0);

__m256i atEndMask = _mm256_cmpeq_epi32(starts_v, ends_v);
checkFinished = !_mm256_testz_si256(atEndMask, atEndMask);

_mm256_store_si256((__m256i *)starts, starts_v);

insert_city(hash, hash_city(_mm256_loadu_si256((__m256i *)masked_dummy)), 0, _mm256_loadu_si256((__m256i *)masked_dummy));

while(1) {
__m256i at_end_mask = _mm256_cmpeq_epi32(starts_v, ends_v);
if (unlikely(!_mm256_testz_si256(at_end_mask, at_end_mask))) {
finished_v = _mm256_or_si256(finished_v, at_end_mask);
if (unlikely(checkFinished)) {
finished_v = _mm256_or_si256(finished_v, atEndMask);

if (unlikely(_mm256_movemask_epi8(finished_v) == 0xFFFFFFFF)) {
return;
Expand Down Expand Up @@ -785,6 +788,9 @@ void process_chunk(const char * const restrict base, const unsigned int * offset
starts_v = _mm256_sub_epi32(starts_v, newline_mask_shift);
_mm256_store_si256((__m256i *)(starts), starts_v);

atEndMask = _mm256_cmpeq_epi32(starts_v, ends_v);
checkFinished = !_mm256_testz_si256(atEndMask, atEndMask);

mulled = _mm256_slli_epi32(mulled, 14);
mulled = _mm256_srli_epi32(mulled, 22);
__m256i final = _mm256_sign_epi32(mulled, minus_mask);
Expand Down

0 comments on commit d3b4bab

Please sign in to comment.