Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ public void generateKeysForBlock(TransformBlock transformBlock, int[] groupKeys)
BlockValSet blockValueSet = transformBlock.getBlockValueSet(_groupByExpressions[i]);
_singleValueDictIds[i] = blockValueSet.getDictionaryIdsSV();
}

_rawKeyHolder.processSingleValue(transformBlock.getNumDocs(), groupKeys);
}

Expand Down Expand Up @@ -253,12 +252,29 @@ private interface RawKeyHolder {
}

private class ArrayBasedHolder implements RawKeyHolder {
// TODO: using bitmap might better
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we consider using BitSet to replace this boolean array?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it’s not a good idea because it creates data dependencies between any contiguous set of 64 groups as they all need to update the same word. I tried it and it was about 30% worse.

private final boolean[] _flags = new boolean[_globalGroupIdUpperBound];
private int _numKeys = 0;

@Override
public void processSingleValue(int numDocs, int[] outGroupIds) {
if (_numGroupByExpressions == 1) {
processSingleValue(numDocs, _singleValueDictIds[0], outGroupIds);
} else {
processSingleValueGeneric(numDocs, outGroupIds);
}
}

private void processSingleValue(int numDocs, int[] dictIds, int[] outGroupIds) {
System.arraycopy(dictIds, 0, outGroupIds, 0, numDocs);
for (int i = 0; i < numDocs; i++) {
if (!_flags[outGroupIds[i]]) {
_numKeys++;
_flags[outGroupIds[i]] = true;
}
}
}

private void processSingleValueGeneric(int numDocs, int[] outGroupIds) {
for (int i = 0; i < numDocs; i++) {
int groupId = 0;
for (int j = _numGroupByExpressions - 1; j >= 0; j--) {
Expand Down Expand Up @@ -377,6 +393,20 @@ public IntMapBasedHolder(IntGroupIdMap groupIdMap) {

@Override
public void processSingleValue(int numDocs, int[] outGroupIds) {
if (_numGroupByExpressions == 1) {
processSingleValue(numDocs, _singleValueDictIds[0], outGroupIds);
} else {
processSingleValueGeneric(numDocs, outGroupIds);
}
}

private void processSingleValue(int numDocs, int[] dictIds, int[] outGroupIds) {
for (int i = 0; i < numDocs; i++) {
outGroupIds[i] = _groupIdMap.getGroupId(dictIds[i], _globalGroupIdUpperBound);
}
}

private void processSingleValueGeneric(int numDocs, int[] outGroupIds) {
for (int i = 0; i < numDocs; i++) {
int rawKey = 0;
for (int j = _numGroupByExpressions - 1; j >= 0; j--) {
Expand Down Expand Up @@ -612,7 +642,8 @@ public void processMultiValue(int numDocs, int[][] outGroupIds) {
private int getGroupId(long rawKey) {
int numGroups = _groupIdMap.size();
if (numGroups < _globalGroupIdUpperBound) {
return _groupIdMap.computeIfAbsent(rawKey, k -> numGroups);
int id = _groupIdMap.putIfAbsent(rawKey, numGroups);
return id == INVALID_ID ? numGroups : id;
} else {
return _groupIdMap.get(rawKey);
}
Expand Down