Skip to content

Commit

Permalink
[OGS] Fix swapped stride arguments in calls to occaScatterKernel(). (#59
Browse files Browse the repository at this point in the history
)

* [OGS] Fix swapped stride arguments in calls to occaScatterKernel().

Found this while debugging wrong output from ogs_t::GatherScatterMany() when
running with multiple MPI processes.  With this fix in place, it seems to work
properly now.

* [gslib] Forwarding gslib sort fix Thanks @nnnunnn !

Co-authored-by: Noel Chalmers <noel.chalmers@gmail.com>
  • Loading branch information
aaustin141 and noelchalmers authored Dec 2, 2020
1 parent 277f0a4 commit ea41e03
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
27 changes: 14 additions & 13 deletions 3rdParty/gslib/src/sort_imp.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ typedef struct { T v; uint i; } sort_data;
#define INDEX_PTR(A,stride,i) (*(T*)((char*)(A)+(i)*(stride)))

/*------------------------------------------------------------------------------
Radix Sort
stable; O(n+k) time and extra storage
where k = (digits in an int) * 2^(bits per digit)
(e.g. k = 4 * 256 = 1024 for 32-bit ints with 8-bit digits)
Expand All @@ -42,7 +42,7 @@ typedef struct { T v; uint i; } sort_data;
counting sort is used for each digit:
a pass through the input counts the occurences of each digit value
on a second pass, each input has a known destination
tricks:
all counting passes are combined into one
the counting pass also computes the inclusive bit-wise or of all inputs,
Expand Down Expand Up @@ -103,7 +103,7 @@ static void radix_offsets(uint *restrict c)
uint *const ce = c+DIGIT_VALUES;
uint sum = 0;
do {
const uint c0=c[0], c1=c[1], c2=c[2], c3=c[3];
const uint c0=c[0], c1=c[1], c2=c[2], c3=c[3];
const uint o1=sum+c0, o2=o1+c1, o3=o2+c2;
c[0]=sum, c[1]=o1, c[2]=o2, c[3]=o3;
sum = o3+c3;
Expand Down Expand Up @@ -256,9 +256,9 @@ static void radix_sortp(
}

/*------------------------------------------------------------------------------
Merge Sort
stable; O(n log n) time
----------------------------------------------------------------------------*/
Expand All @@ -278,7 +278,8 @@ static void radix_sortp(
} while(0)
#define MERGE_SORT() \
do { \
uint i=0, n=An, base=-n, odd=0, c=0, b=1; \
uint i=0, n=An, odd=0, c=0, b=1; \
sint base=-n; \
for(;;) { \
DATA *restrict p; \
if((c&1)==0) { \
Expand Down Expand Up @@ -386,9 +387,9 @@ static void merge_sortp(
#undef MERGE_2

/*------------------------------------------------------------------------------
Heap Sort
in-place, stability unobservable; O(n log n) time
----------------------------------------------------------------------------*/
Expand Down Expand Up @@ -420,14 +421,14 @@ static void heap_sortv(T *const restrict A, unsigned n)


/*------------------------------------------------------------------------------
Hybrid Stable Sort
low-overhead merge sort when n is small,
otherwise asymptotically superior radix sort
result = O(n) sort with good performance for all n
A, n, stride : specifices the input, stride in bytes
out : the sorted values on output
Expand Down Expand Up @@ -504,7 +505,7 @@ uint *sortp(buffer *restrict buf, int start_perm,
work = (sort_data*)((char*)buf->ptr+work_off);
count = (uint(*)[DIGIT_VALUES])((char*)buf->ptr+count_off);
radix_sortp(perm,start_perm, A,n,stride, work,count);
}
}
return perm;
}

Expand Down
6 changes: 3 additions & 3 deletions libs/ogs/occaGatherScatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ void occaGatherScatterFinish(occa::memory& o_v,

// scatter back to local nodes
if (trans == ogs_trans)
occaScatterKernel(ogs.haloGather, Nentries, Nvectors, stride, ogs.Nhalo,
occaScatterKernel(ogs.haloGather, Nentries, Nvectors, ogs.Nhalo, stride,
type, op, ogs.o_haloBuf, o_v);
else
occaScatterKernel(ogs.haloScatter, Nentries, Nvectors, stride, ogs.Nhalo,
occaScatterKernel(ogs.haloScatter, Nentries, Nvectors, ogs.Nhalo, stride,
type, op, ogs.o_haloBuf, o_v);
}
}
Expand Down Expand Up @@ -173,4 +173,4 @@ void occaGatherScatterKernel(const ogsData_t &gather,
#undef WITH_OP
}

} //namespace ogs
} //namespace ogs

0 comments on commit ea41e03

Please sign in to comment.