-
Notifications
You must be signed in to change notification settings - Fork 0
Natural Merge Sort with Lists
A natural merge sort takes advantage of the fact that random input data will have many short runs of rows that just happen to be sorted. This version of the sort algorithm uses a list to represent the data's sort order. Using a list to show the sort order is generally more versatile than physically rearranging the data.
Consider the following data set which we'll sort by Category and Item
element Category Item 1 Vegetable Tree 2 Animal Bird 3 Mineral Diamond 4 Vegetable Flower 5 Vegetable Grass 6 Animal Cat 7 Animal Dog 8 Mineral Ruby 9 Mineral Quartz
The algorithm requires two arrays for storing the sort results, group and nextLine. The length of each array matches the length of the data set.
The sorting algorithm starts by determining natural groupings found within the data set. Each group represents a sorted run of adjacent data set rows. In our example, four natural groupings exist. They are reflected in the group/nextLine list.
element Category Item group nextLine 1 Vegetable Tree 1 0 2 Animal Bird 2 3 3 Mineral Diamond 6 4 4 Vegetable Flower 9 5 5 Vegetable Grass 0 6 Animal Cat 7 7 Animal Dog 8 8 Mineral Ruby 0 9 Mineral Quartz 0
The next step in the sort process is to repeatedly merge adjacent sub-lists to produce new sorted sub-lists until there is only one sub-list remaining. This last remaining sub-list represents the sort order of the data.
element Category Item group nextLine 1 Vegetable Tree 2 0 2 Animal Bird 6 3 3 Mineral Diamond 4 4 Vegetable Flower 5 5 Vegetable Grass 1 6 Animal Cat 7 7 Animal Dog 9 8 Mineral Ruby 0 9 Mineral Quartz 8
element Category Item group nextLine 1 Vegetable Tree 2 0 2 Animal Bird 6 3 Mineral Diamond 9 4 Vegetable Flower 5 5 Vegetable Grass 1 6 Animal Cat 7 7 Animal Dog 3 8 Mineral Ruby 4 9 Mineral Quartz 8
The first element of group is the line number of the first data row in the sort order, in this case 2. We go to the second element of nextLine to see that the next row number in the sort is 6. We go to the sixth element of nextLine to see that the next row number is 7, and so on. A nextLine value of zero (0) signifies the end of the list.
// Sort data items. group and nextLine keep the new sort sequence.
//
// rank may also keep the sort sequence.
// rank[i]=n; where n is the ith member of the sorted data.
//
// data = The data to sort
// compareFunc = Comparison function
// sortColumns = the data columns to sort by
Roots.mrsort = function(data, compareFunc, sortColumns, group, nextLine, rank) {
var count;
if (typeof data.length === "function")
count = data.length();
else
count = data.length;
if (count <= 0)
throw "Roots.mrsort: Bad input!";
// Only one item in data?
if (count === 1) {
group[0] = 1;
nextLine[0] = 0;
if (rank === undefined)
return;
rank[0] = 1;
return;
}
var i, a, b, last, g, ngroup, pair, npair;
ngroup = group[0] = 1;
g = nextLine[0] = 0;
// Determine the natural groupings
for (a = 0, b = 1; b < count; ++a, ++b) {
if (compareFunc(data, b, a, sortColumns) < 0) {
// New group
group[++g] = b + 1;
nextLine[a] = 0;
++ngroup;
}
// Same group
else
nextLine[a] = b + 1;
}
nextLine[a] = 0;
// Merge Phase
for (npair = Math.floor(ngroup / 2); npair > 0; npair = Math.floor(ngroup / 2)) {
g = -1;
// Merge group A with group B
for (pair = 0; pair < npair; ++pair) {
a = group[++g];
b = group[++g];
// Initial compare
if (compareFunc(data, b-1, a-1, sortColumns) < 0) {
// B is the 1st member of a new group
group[pair] = b;
last = b;
b = nextLine[b - 1];
// No more B? Append the rest of A
if (b === 0) {
nextLine[last - 1] = a;
continue;
}
}
// A is the 1st member of a new group
else {
group[pair] = a;
last = a;
a = nextLine[a - 1];
// No more A? Append the rest of B
if (a === 0) {
nextLine[last - 1] = b;
continue;
}
}
// More line compares
for (;;) {
// Append B to an established group
if (compareFunc(data, b-1, a-1, sortColumns) < 0) {
nextLine[last - 1] = b;
last = b;
b = nextLine[b - 1];
// No more B?
if (b === 0) {
nextLine[last - 1] = a;
break;
}
}
// Append A to an established group
else {
nextLine[last - 1] = a;
last = a;
a = nextLine[a - 1];
// No more A?
if (a === 0) {
nextLine[last - 1] = b;
break;
}
}
}
} // Next group pair
// Reset ngroup for the next pass
// There was an unused group. Add the odd group to new groups.
if (ngroup % 2 != 0) {
group[npair] = group[ngroup - 1];
ngroup = npair + 1;
}
// The groups paired evenly
else
ngroup = npair;
} // Next pass for merge process
// The data is sorted
// Derive table item rankings (optional)
if (rank === undefined)
return;
for (a = group[0], i = 0; a != 0; a = nextLine[a - 1])
rank[i++] = a;
}