diff --git a/docs/en/administration/management/FE_configuration.md b/docs/en/administration/management/FE_configuration.md index b0adef463fed2..53f5f19b82a48 100644 --- a/docs/en/administration/management/FE_configuration.md +++ b/docs/en/administration/management/FE_configuration.md @@ -2548,6 +2548,16 @@ ADMIN SET FRONTEND CONFIG ("key" = "value"); - Description: The percentage threshold for determining whether the load of a BE is balanced. If a BE has a lower load than the average load of all BEs and the difference is greater than this value, this BE is in a low load state. On the contrary, if a BE has a higher load than the average load and the difference is greater than this value, this BE is in a high load state. - Introduced in: - +##### tablet_sched_num_based_balance_threshold_ratio + +- Default: 0.5 +- Alias: - +- Type: Double +- Unit: - +- Is mutable: Yes +- Description: Doing num based balance may break the disk size balance, but the maximum gap between disks cannot exceed tablet_sched_num_based_balance_threshold_ratio * tablet_sched_balance_load_score_threshold. If there are tablets in the cluster that are constantly balancing from A to B and B to A, reduce this value. If you want the tablet distribution to be more balanced, increase this value. +- Introduced in: - 3.1 + ##### tablet_sched_balance_load_disk_safe_threshold - Default: 0.5 diff --git a/docs/zh/administration/management/FE_configuration.md b/docs/zh/administration/management/FE_configuration.md index c4779529129fb..96e6a521b1193 100644 --- a/docs/zh/administration/management/FE_configuration.md +++ b/docs/zh/administration/management/FE_configuration.md @@ -2550,6 +2550,16 @@ ADMIN SET FRONTEND CONFIG ("key" = "value"); - 描述:用于判断 BE 负载是否均衡的百分比阈值。如果一个 BE 的负载低于所有 BE 的平均负载,且差值大于该阈值,则认为该 BE 处于低负载状态。相反,如果一个 BE 的负载比平均负载高且差值大于该阈值,则认为该 BE 处于高负载状态。 - 引入版本:- +##### tablet_sched_num_based_balance_threshold_ratio + +- 默认值:0.5 +- 别名:- +- 类型:Double +- 单位:- +- 是否动态:是 +- 描述:做分布均衡时可能会打破磁盘大小均衡,但磁盘间的最大差距不能超过tablet_sched_num_based_balance_threshold_ratio * table_sched_balance_load_score_threshold。 如果集群中存在不断从 A 到 B、从 B 到 A 的克隆,请减小该值。 如果希望tablet分布更加均衡,请调大该值。 +- 引入版本:3.1 + ##### tablet_sched_balance_load_disk_safe_threshold - 默认值:0.5 diff --git a/fe/fe-core/src/main/java/com/starrocks/clone/DiskAndTabletLoadReBalancer.java b/fe/fe-core/src/main/java/com/starrocks/clone/DiskAndTabletLoadReBalancer.java index b1595eba56438..d895abf3d161f 100644 --- a/fe/fe-core/src/main/java/com/starrocks/clone/DiskAndTabletLoadReBalancer.java +++ b/fe/fe-core/src/main/java/com/starrocks/clone/DiskAndTabletLoadReBalancer.java @@ -1964,9 +1964,11 @@ public boolean check(Long src, Long dest, Long size) { return true; } - // this will make disk balance worse, but can not exceed 0.9 * Config.balance_load_score_threshold; + // this will make disk balance worse, but can not exceed + // Config.tablet_sched_num_based_balance_threshold_ratio * Config.balance_load_score_threshold; return maxUsedPercentAfterBalance - minUsedPercentAfterBalance < - 0.9 * Config.tablet_sched_balance_load_score_threshold; + Config.tablet_sched_num_based_balance_threshold_ratio * + Config.tablet_sched_balance_load_score_threshold; } public void moveReplica(Long src, Long dest, Long size) { diff --git a/fe/fe-core/src/main/java/com/starrocks/common/Config.java b/fe/fe-core/src/main/java/com/starrocks/common/Config.java index e2690d2af7248..058c1730105e8 100644 --- a/fe/fe-core/src/main/java/com/starrocks/common/Config.java +++ b/fe/fe-core/src/main/java/com/starrocks/common/Config.java @@ -1463,7 +1463,6 @@ public class Config extends ConfigBase { public static long tablet_sched_max_not_being_scheduled_interval_ms = 15 * 60 * 1000; /** - * FOR DiskAndTabletLoadBalancer: * upper limit of the difference in disk usage of all backends, exceeding this threshold will cause * disk balance */ @@ -1471,7 +1470,6 @@ public class Config extends ConfigBase { public static double tablet_sched_balance_load_score_threshold = 0.1; // 10% /** - * For DiskAndTabletLoadBalancer: * if all backends disk usage is lower than this threshold, disk balance will never happen */ @ConfField(mutable = true, aliases = {"balance_load_disk_safe_threshold"}) @@ -1512,6 +1510,16 @@ public class Config extends ConfigBase { @ConfField(mutable = true) public static long tablet_sched_consecutive_full_clone_delay_sec = 180; // 3min + /** + * Doing num based balance may break the disk size balance, + * but the maximum gap between disks cannot exceed + * tablet_sched_distribution_balance_threshold_ratio * tablet_sched_balance_load_score_threshold + * If there are tablets in the cluster that are constantly balancing from A to B and B to A, reduce this value. + * If you want the tablet distribution to be more balanced, increase this value. + */ + @ConfField(mutable = true) + public static double tablet_sched_num_based_balance_threshold_ratio = 0.5; + @ConfField(mutable = true, comment = "How much time we should wait before dropping the tablet from BE on tablet report") public static long tablet_report_drop_tablet_delay_sec = 120;