Skip to content

Commit

Permalink
More variable-length arrays in forder
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico committed Jul 17, 2024
1 parent a01f00f commit 5ea35ee
Showing 1 changed file with 23 additions and 5 deletions.
28 changes: 23 additions & 5 deletions src/forder.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,9 @@ void radix_r(const int from, const int to, const int radix) {
#endif

uint8_t *restrict my_key = key[radix]+from; // safe to write as we don't use this radix again
uint8_t o[my_n];
uint8_t *o = (uint8_t *)malloc(my_n * sizeof(uint8_t));
if (!o)
STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(uint8_t)), "o");
// if last key (i.e. radix+1==nradix) there are no more keys to reorder so we could reorder osub by reference directly and save allocating and populating o just
// to use it once. However, o's type is uint8_t so many moves within this max-256 vector should be faster than many moves in osub (4 byte or 8 byte ints) [1 byte
// type is always aligned]
Expand Down Expand Up @@ -912,7 +914,11 @@ void radix_r(const int from, const int to, const int radix) {
}
if (!skip) {
// reorder osub and each remaining ksub
int TMP[my_n]; // on stack fine since my_n is very small (<=256)
int *TMP = malloc(my_n * sizeof(int));
if (!TMP) {
free(o);
STOP(_("Failed to allocate %d bytes for '%s'."), my_n * sizeof(int), "TMP");
}
const int *restrict osub = anso+from;
for (int i=0; i<my_n; i++) TMP[i] = osub[o[i]];
memcpy((int *restrict)(anso+from), TMP, my_n*sizeof(int));
Expand All @@ -921,14 +927,19 @@ void radix_r(const int from, const int to, const int radix) {
for (int i=0; i<my_n; i++) ((uint8_t *)TMP)[i] = ksub[o[i]];
memcpy((uint8_t *restrict)(key[r]+from), (uint8_t *)TMP, my_n);
}
free(TMP);
TEND(8)
}
free(o);
// my_key is now grouped (and sorted by group too if sort!=0)
// all we have left to do is find the group sizes and either recurse or push
if (radix+1==nradix && !retgrp) {
return;
}
int ngrp=0, my_gs[my_n]; //minor TODO: could know number of groups with certainty up above
int ngrp=0; //minor TODO: could know number of groups with certainty up above
int *my_gs = malloc(my_n * sizeof(int));
if (!my_gs)
STOP(_("Failed to allocate %d bytes for '%s'."), my_n * sizeof(int), "my_gs");
my_gs[ngrp]=1;
for (int i=1; i<my_n; i++) {
if (my_key[i]!=my_key[i-1]) my_gs[++ngrp] = 1;
Expand All @@ -944,6 +955,7 @@ void radix_r(const int from, const int to, const int radix) {
f+=my_gs[i];
}
}
free(my_gs);
return;
}
else if (my_n<=UINT16_MAX) { // UINT16_MAX==65535 (important not 65536)
Expand Down Expand Up @@ -1027,7 +1039,9 @@ void radix_r(const int from, const int to, const int radix) {
if (!retgrp && radix+1==nradix) {
return; // we're done. avoid allocating and populating very last group sizes for last key
}
int my_gs[ngrp==0 ? 256 : ngrp]; // ngrp==0 when sort and skip==true; we didn't count the non-zeros in my_counts yet in that case
int *my_gs = malloc((ngrp==0 ? 256 : ngrp) * sizeof(int)); // ngrp==0 when sort and skip==true; we didn't count the non-zeros in my_counts yet in that case
if (!my_gs)
STOP(_("Failed to allocate %d bytes for '%s'."), (ngrp==0 ? 256 : ngrp) * sizeof(int), "my_gs");
if (sortType!=0) {
ngrp=0;
for (int i=0; i<256; i++) if (my_counts[i]) my_gs[ngrp++]=my_counts[i]; // this casts from uint16_t to int32, too
Expand All @@ -1045,6 +1059,7 @@ void radix_r(const int from, const int to, const int radix) {
my_from+=my_gs[i];
}
}
free(my_gs);
return;
}
// else parallel batches. This is called recursively but only once or maybe twice before resolving to UINT16_MAX branch above
Expand Down Expand Up @@ -1211,7 +1226,9 @@ void radix_r(const int from, const int to, const int radix) {
TEND(19 + notFirst*3)
notFirst = true;

int my_gs[ngrp];
int *my_gs = malloc(ngrp * sizeof(int));
if (!my_gs)
STOP(_("Failed to allocate %d bytes for '%s'."), ngrp * sizeof(int), "my_gs");
for (int i=1; i<ngrp; i++) my_gs[i-1] = starts[ugrp[i]] - starts[ugrp[i-1]]; // use the first row of starts to get totals
my_gs[ngrp-1] = my_n - starts[ugrp[ngrp-1]];

Expand Down Expand Up @@ -1265,6 +1282,7 @@ void radix_r(const int from, const int to, const int radix) {
TEND(25)
}
}
free(my_gs);
free(counts);
free(starts);
free(ugrps);
Expand Down

0 comments on commit 5ea35ee

Please sign in to comment.