Skip to content

Commit

Permalink
support for "splitFactor" parameterization
Browse files Browse the repository at this point in the history
  • Loading branch information
Mr-KIDBK committed Nov 17, 2020
1 parent 809cffc commit b4a3eeb
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ public final class Constant {

public static String TABLE_NAME_PLACEHOLDER = "@table";

public static Integer SPLIT_FACTOR = 5;

}
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,6 @@ public final class Key {

public final static String DRYRUN = "dryRun";

public static String SPLIT_FACTOR = "splitFactor";

}
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@ public static List<Configuration> doSplit(
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾

//考虑其他比率数字?(splitPk is null, 忽略此长尾)
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;

//为避免导入hive小文件 默认基数为5,也就是channel配置几个就是几个task,可以通过 pkQuota 配置基数
// 最终task数为(channel/tableNum)向上取整*pkQuota
Integer quota = originalSliceConfig.getInt(Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR);
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * quota;
}
// 尝试对每个表,切分为eachTableShouldSplittedNumber 份
for (String table : tables) {
Expand Down

0 comments on commit b4a3eeb

Please sign in to comment.