Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDFS-16858. Dynamically adjust max slow disks to exclude. #5180

Merged
merged 4 commits into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT;
Expand Down Expand Up @@ -353,6 +355,7 @@ public class DataNode extends ReconfigurableBase
DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY,
FS_DU_INTERVAL_KEY,
FS_GETSPACEUSED_JITTER_KEY,
FS_GETSPACEUSED_CLASSNAME));
Expand Down Expand Up @@ -699,6 +702,7 @@ public String reconfigurePropertyImpl(String property, String newVal)
case DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY:
case DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY:
case DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY:
case DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY:
return reconfSlowDiskParameters(property, newVal);
case FS_DU_INTERVAL_KEY:
case FS_GETSPACEUSED_JITTER_KEY:
Expand Down Expand Up @@ -877,6 +881,12 @@ private String reconfSlowDiskParameters(String property, String newVal)
Long.parseLong(newVal));
result = Long.toString(threshold);
diskMetrics.setLowThresholdMs(threshold);
} else if (property.equals(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY)) {
checkNotNull(diskMetrics, "DataNode disk stats may be disabled.");
int maxSlowDisksToExclude = (newVal == null ? DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT :
Integer.parseInt(newVal));
result = Integer.toString(maxSlowDisksToExclude);
diskMetrics.setMaxSlowDisksToExclude(maxSlowDisksToExclude);
}
LOG.info("RECONFIGURE* changed {} to {}", property, newVal);
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public class DataNodeDiskMetrics {
/**
* The number of slow disks that needs to be excluded.
*/
private int maxSlowDisksToExclude;
private volatile int maxSlowDisksToExclude;
/**
* List of slow disks that need to be excluded.
*/
Expand Down Expand Up @@ -274,6 +274,14 @@ public List<String> getSlowDisksToExclude() {
return slowDisksToExclude;
}

public int getMaxSlowDisksToExclude() {
return maxSlowDisksToExclude;
}

public void setMaxSlowDisksToExclude(int maxSlowDisksToExclude) {
this.maxSlowDisksToExclude = maxSlowDisksToExclude;
}

public void setLowThresholdMs(long thresholdMs) {
Preconditions.checkArgument(thresholdMs > 0,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY + " should be larger than 0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
Expand Down Expand Up @@ -636,13 +637,15 @@ public void testSlowDiskParameters() throws ReconfigurationException, IOExceptio
String[] slowDisksParameters2 = {
DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY};
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY};
for (String parameter : slowDisksParameters2) {
dn.reconfigureProperty(parameter, "99");
}
// Assert diskMetrics.
assertEquals(99, dn.getDiskMetrics().getMinOutlierDetectionDisks());
assertEquals(99, dn.getDiskMetrics().getLowThresholdMs());
assertEquals(99, dn.getDiskMetrics().getMaxSlowDisksToExclude());
// Assert dnConf.
assertTrue(dn.getDnConf().diskStatsEnabled);
// Assert profilingEventHook.
Expand Down Expand Up @@ -673,12 +676,16 @@ public void testSlowDiskParameters() throws ReconfigurationException, IOExceptio
dn.reconfigureProperty(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, "1");
dn.reconfigureProperty(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, null);
dn.reconfigureProperty(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, null);
dn.reconfigureProperty(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY, null);
assertEquals(String.format("expect %s is not configured",
DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY), null,
dn.getConf().get(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY));
assertEquals(String.format("expect %s is not configured",
DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY), null,
dn.getConf().get(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY));
assertEquals(String.format("expect %s is not configured",
DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY), null,
dn.getConf().get(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY));
assertEquals(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT,
dn.getDiskMetrics().getSlowDiskDetector().getMinOutlierDetectionNodes());
assertEquals(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT,
Expand Down