Skip to content

Commit

Permalink
[Enhancement]enlarge default virtual node number be more friendly to …
Browse files Browse the repository at this point in the history
…cache (StarRocks#36101)

Signed-off-by: zombee0 <ewang2027@gmail.com>
  • Loading branch information
zombee0 authored Dec 1, 2023
1 parent 4a5a0ea commit de3faf1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public class HDFSBackendSelector implements BackendSelector {
// After testing, this value can ensure that the scan range size assigned to each BE is as uniform as possible,
// and the largest scan data is not more than 1.1 times of the average value
private final double kMaxImbalanceRatio = 1.1;
public static final int CONSISTENT_HASH_RING_VIRTUAL_NUMBER = 32;
public static final int CONSISTENT_HASH_RING_VIRTUAL_NUMBER = 128;

class HdfsScanRangeHasher {
String basePath;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,7 @@ public static MaterializedViewRewriteMode parse(String str) {
private String hdfsBackendSelectorHashAlgorithm = "consistent";

@VariableMgr.VarAttr(name = CONSISTENT_HASH_VIRTUAL_NUMBER, flag = VariableMgr.INVISIBLE)
private int consistentHashVirtualNodeNum = 32;
private int consistentHashVirtualNodeNum = 128;

// binary, json, compact,
@VarAttr(name = THRIFT_PLAN_PROTOCOL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class HDFSBackendSelectorTest {
private ConnectContext context;
final int scanNodeId = 0;
final int computeNodePort = 9030;
final String hostFormat = "Host%02d";
final String hostFormat = "192.168.1.%02d";

private List<TScanRangeLocations> createScanRanges(long number, long size) {
List<TScanRangeLocations> ans = new ArrayList<>();
Expand Down Expand Up @@ -117,7 +117,7 @@ public void testHdfsScanNodeHashRing() throws Exception {
}
};

int scanRangeNumber = 100;
int scanRangeNumber = 10000;
int scanRangeSize = 10000;
int hostNumber = 3;
List<TScanRangeLocations> locations = createScanRanges(scanRangeNumber, scanRangeSize);
Expand All @@ -136,11 +136,11 @@ public void testHdfsScanNodeHashRing() throws Exception {
selector.computeScanRangeAssignment();

int avg = (scanRangeNumber * scanRangeSize) / hostNumber;
int variance = 5 * scanRangeSize;
double variance = 0.2 * avg;
Map<Long, Long> stats = computeWorkerIdToReadBytes(assignment, scanNodeId);
for (Map.Entry<Long, Long> entry : stats.entrySet()) {
System.out.printf("%s -> %d bytes\n", entry.getKey(), entry.getValue());
Assert.assertTrue(Math.abs(entry.getValue() - avg) < variance);
Assert.assertTrue(entry.getValue() - avg < variance);
}

// test empty compute nodes
Expand Down Expand Up @@ -183,7 +183,7 @@ public void testHdfsScanNodeScanRangeReBalance() throws Exception {
}
};

long scanRangeNumber = 100;
long scanRangeNumber = 10000;
long scanRangeSize = 10000;
int hostNumber = 3;
List<TScanRangeLocations> locations = createScanRanges(scanRangeNumber, scanRangeSize);
Expand All @@ -206,14 +206,16 @@ public void testHdfsScanNodeScanRangeReBalance() throws Exception {
Map<Long, Long> stats = computeWorkerIdToReadBytes(assignment, scanNodeId);
for (Map.Entry<Long, Long> entry : stats.entrySet()) {
System.out.printf("%s -> %d bytes\n", entry.getKey(), entry.getValue());
Assert.assertTrue(Math.abs(entry.getValue() - avg) < variance);
Assert.assertTrue((entry.getValue() - avg) < variance);
}

variance = 2 * scanRangeSize;
variance = 0.4 / 100 * scanRangeNumber * scanRangeSize;
double actual = 0;
for (Map.Entry<ComputeNode, Long> entry : selector.reBalanceBytesPerComputeNode.entrySet()) {
System.out.printf("%s -> %d bytes re-balance\n", entry.getKey(), entry.getValue());
Assert.assertTrue(entry.getValue() <= variance);
actual = actual + entry.getValue();
}
Assert.assertTrue(actual < variance);
}

@Test
Expand Down

0 comments on commit de3faf1

Please sign in to comment.