-
Notifications
You must be signed in to change notification settings - Fork 57
Simplifying fallback kernels #303
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1130,8 +1130,8 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
const DOUBLE min_xdiff, const DOUBLE min_ydiff, const DOUBLE min_zdiff, | ||
const DOUBLE closest_icell_xpos, const DOUBLE closest_icell_ypos, | ||
const DOUBLE closest_icell_zpos, | ||
DOUBLE *src_rpavg, uint64_t *src_npairs, | ||
DOUBLE *src_weightavg, const weight_method_t weight_method) | ||
DOUBLE *restrict src_rpavg, uint64_t *restrict src_npairs, | ||
DOUBLE *restrict src_weightavg, const weight_method_t weight_method) | ||
{ | ||
if(N0 == 0 || N1 == 0) { | ||
return EXIT_SUCCESS; | ||
|
@@ -1141,39 +1141,16 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
return EXIT_FAILURE; | ||
} | ||
|
||
const int32_t need_rpavg = src_rpavg != NULL; | ||
const int32_t need_weightavg = src_weightavg != NULL; | ||
|
||
(void) fast_divide_and_NR_steps;//unused parameter but required to keep the same function signature amongst the kernels | ||
|
||
/*----------------- FALLBACK CODE --------------------*/ | ||
const int64_t totnbins = (npibin+1)*(nbin+1); | ||
const DOUBLE sqr_pimax = pimax*pimax; | ||
const DOUBLE sqr_max_sep = sqr_rpmax + sqr_pimax; | ||
|
||
uint64_t npairs[totnbins]; | ||
DOUBLE rpavg[totnbins], weightavg[totnbins]; | ||
for(int i=0;i<totnbins;i++) { | ||
npairs[i] = 0; | ||
if(need_rpavg) { | ||
rpavg[i]=ZERO; | ||
} | ||
if(need_weightavg){ | ||
weightavg[i]=ZERO; | ||
} | ||
} | ||
|
||
// A copy whose pointers we can advance | ||
weight_struct_DOUBLE local_w0 = {.weights={NULL}, .num_weights=0}, local_w1 = {.weights={NULL}, .num_weights=0}; | ||
pair_struct_DOUBLE pair = {.num_weights=0}; | ||
weight_func_t_DOUBLE weight_func = NULL; | ||
if(need_weightavg){ | ||
// Same particle list, new copy of num_weights pointers into that list | ||
local_w0 = *weights0; | ||
local_w1 = *weights1; | ||
|
||
pair.num_weights = local_w0.num_weights; | ||
|
||
if(src_weightavg != NULL){ | ||
pair.num_weights = weights0->num_weights; | ||
weight_func = get_weight_func_by_method_DOUBLE(weight_method); | ||
} | ||
|
||
|
@@ -1187,7 +1164,7 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
const DOUBLE ypos = *y0++; | ||
const DOUBLE zpos = *z0++; | ||
for(int w = 0; w < pair.num_weights; w++){ | ||
pair.weights0[w].d = *local_w0.weights[w]++; | ||
pair.weights0[w].d = *(weights0->weights[w] + i); | ||
} | ||
DOUBLE max_dz = max_all_dz; | ||
|
||
|
@@ -1249,9 +1226,6 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
const int64_t n_off = localz1 - zstart; | ||
DOUBLE *localx1 = x1 + n_off; | ||
DOUBLE *localy1 = y1 + n_off; | ||
for(int w = 0; w < local_w1.num_weights; w++){ | ||
local_w1.weights[w] = weights1->weights[w] + n_off; | ||
} | ||
|
||
const int64_t nleft = N1 - n_off; | ||
for(int64_t j=0;j<nleft;j++){ | ||
|
@@ -1263,10 +1237,6 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
const DOUBLE perpy = *localy1++ - ypos; | ||
const DOUBLE perpz = *localz1++ - zpos; | ||
|
||
for(int w = 0; w < pair.num_weights; w++){ | ||
pair.weights1[w].d = *local_w1.weights[w]++; | ||
} | ||
|
||
/* particles are sorted on z, all future j particles will have larger value in localz1 -> perpz will be | ||
larger. therefore, if current perpz >= max_dz, all future iterations will also be larger. we can | ||
terminate the loop */ | ||
|
@@ -1288,10 +1258,13 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
if(sqr_Dperp >= sqr_rpmax || sqr_Dperp < sqr_rpmin) continue; | ||
|
||
DOUBLE rp = ZERO, pairweight = ZERO; | ||
if(need_rpavg) { | ||
if(src_rpavg != NULL) { | ||
rp = SQRT(sqr_Dperp); | ||
} | ||
if(need_weightavg){ | ||
if(src_weightavg != NULL){ | ||
for(int w = 0; w < pair.num_weights; w++){ | ||
pair.weights1[w].d = *(weights1->weights[w] + n_off + j); | ||
} | ||
pair.dx.d = perpx; | ||
pair.dy.d = perpy; | ||
pair.dz.d = perpz; | ||
|
@@ -1306,28 +1279,19 @@ static inline int countpairs_rp_pi_mocks_fallback_DOUBLE(const int64_t N0, DOUBL | |
for(int kbin=nbin-1;kbin>=1;kbin--) { | ||
if(sqr_Dperp >= rupp_sqr[kbin-1]) { | ||
const int ibin = kbin*(npibin+1) + pibin; | ||
npairs[ibin]++; | ||
if(need_rpavg) { | ||
rpavg[ibin]+=rp; | ||
src_npairs[ibin]++; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the idea here is that there's no point in making stack buffers, since the passed buffers are already local to the current thread? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup. I am also considering whether it would be worthwhile to move to a malloc'ed buffer rather than the stack (but there may be side-effects of false sharing under OpenMP with such a malloc'ed |
||
if(src_rpavg != NULL) { | ||
src_rpavg[ibin] += rp; | ||
} | ||
if(need_weightavg){ | ||
weightavg[ibin] += pairweight; | ||
if(src_weightavg != NULL){ | ||
src_weightavg[ibin] += pairweight; | ||
} | ||
break; | ||
} | ||
}//finding kbin | ||
}//j loop over second set of particles | ||
}//i loop over first set of particles | ||
|
||
for(int i=0;i<totnbins;i++) { | ||
src_npairs[i] += npairs[i]; | ||
if(need_rpavg) { | ||
src_rpavg[i] += rpavg[i]; | ||
} | ||
if(need_weightavg){ | ||
src_weightavg[i] += weightavg[i]; | ||
} | ||
} | ||
|
||
return EXIT_SUCCESS; | ||
}//end of fallback code |
Uh oh!
There was an error while loading. Please reload this page.