Skip to content

Commit

Permalink
HDDS-1178. Healthy pipeline Chill Mode Rule.
Browse files Browse the repository at this point in the history
Closes #518
  • Loading branch information
bharatviswa504 authored and elek committed Feb 27, 2019
1 parent 0e45020 commit 6c8c422
Show file tree
Hide file tree
Showing 10 changed files with 315 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ public final class HddsConfigKeys {
public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT =
"hdds.scm.chillmode.threshold.pct";
public static final double HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT = 0.99;


// percentage of healthy pipelines, where all 3 datanodes are reported in the
// pipeline.
public static final String HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT =
"hdds.scm.chillmode.healthy.pipelie.pct";
public static final double
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT = 0.10;

public static final String HDDS_LOCK_MAX_CONCURRENCY =
"hdds.lock.max.concurrency";
public static final int HDDS_LOCK_MAX_CONCURRENCY_DEFAULT = 100;
Expand Down
10 changes: 10 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,16 @@
</description>
</property>

<property>
<name>hdds.scm.chillmode.healthy.pipelie.pct</name>
<value>0.10</value>
<tag>HDDS,SCM,OPERATION</tag>
<description>
Percentage of healthy pipelines, where all 3 datanodes are reported in the
pipeline.
</description>
</property>

<property>
<name>hdds.container.action.max.limit</name>
<value>20</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
*/
package org.apache.hadoop.hdds.scm.chillmode;

import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
Expand All @@ -30,33 +31,82 @@
import org.apache.hadoop.hdds.server.events.EventPublisher;

import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Class defining Chill mode exit criteria for Pipelines.
*
* This rule defines percentage of healthy pipelines need to be reported.
* Once chill mode exit happens, this rules take care of writes can go
* through in a cluster.
*/
public class PipelineChillModeRule
public class HealthyPipelineChillModeRule
implements ChillModeExitRule<PipelineReportFromDatanode>,
EventHandler<PipelineReportFromDatanode> {
/** Pipeline availability.*/
private AtomicBoolean isPipelineAvailable = new AtomicBoolean(false);

private static final Logger LOG =
LoggerFactory.getLogger(HealthyPipelineChillModeRule.class);
private final PipelineManager pipelineManager;
private final SCMChillModeManager chillModeManager;
private final int healthyPipelineThresholdCount;
private int currentHealthyPipelineCount = 0;

PipelineChillModeRule(PipelineManager pipelineManager,
SCMChillModeManager manager) {
HealthyPipelineChillModeRule(PipelineManager pipelineManager,
SCMChillModeManager manager, Configuration configuration) {
this.pipelineManager = pipelineManager;
this.chillModeManager = manager;
double healthyPipelinesPercent =
configuration.getDouble(HddsConfigKeys.
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT,
HddsConfigKeys.
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT);

// As we want to wait for 3 node pipelines
int pipelineCount =
pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS,
HddsProtos.ReplicationFactor.THREE).size();

// This value will be zero when pipeline count is 0.
// On a fresh installed cluster, there will be zero pipelines in the SCM
// pipeline DB.
healthyPipelineThresholdCount =
(int) Math.ceil((healthyPipelinesPercent / 100) * pipelineCount);

LOG.info(" Total pipeline count is {}, healthy pipeline " +
"threshold count is {}", pipelineCount, healthyPipelineThresholdCount);
}

@Override
public boolean validate() {
return isPipelineAvailable.get();
if (currentHealthyPipelineCount >= healthyPipelineThresholdCount) {
return true;
}
return false;
}

@Override
public void process(PipelineReportFromDatanode report) {
// No need to deal with
public void process(PipelineReportFromDatanode pipelineReportFromDatanode) {
Pipeline pipeline;
Preconditions.checkNotNull(pipelineReportFromDatanode);
PipelineReportsProto pipelineReport =
pipelineReportFromDatanode.getReport();

for (PipelineReport report : pipelineReport.getPipelineReportList()) {
PipelineID pipelineID = PipelineID
.getFromProtobuf(report.getPipelineID());
try {
pipeline = pipelineManager.getPipeline(pipelineID);
} catch (PipelineNotFoundException e) {
continue;
}

if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
// If the pipeline is open state mean, all 3 datanodes are reported
// for this pipeline.
currentHealthyPipelineCount++;
}
}
}

@Override
Expand All @@ -67,38 +117,22 @@ public void cleanup() {
@Override
public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
EventPublisher publisher) {
// If we are already in pipeline available state,
// skipping following check.
// If we have already reached healthy pipeline threshold, skip processing
// pipeline report from datanode.

if (validate()) {
chillModeManager.validateChillModeExitRules(publisher);
return;
}

Pipeline pipeline;
Preconditions.checkNotNull(pipelineReportFromDatanode);
PipelineReportsProto pipelineReport = pipelineReportFromDatanode
.getReport();

for (PipelineReport report : pipelineReport.getPipelineReportList()) {
PipelineID pipelineID = PipelineID
.getFromProtobuf(report.getPipelineID());
try {
pipeline = pipelineManager.getPipeline(pipelineID);
} catch (PipelineNotFoundException e) {
continue;
}
// Process pipeline report from datanode
process(pipelineReportFromDatanode);

if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
// ensure there is an OPEN state pipeline and then allowed
// to exit chill mode
isPipelineAvailable.set(true);

if (chillModeManager.getInChillMode()) {
SCMChillModeManager.getLogger()
.info("SCM in chill mode. 1 Pipeline reported, 1 required.");
}
break;
}
if (chillModeManager.getInChillMode()) {
SCMChillModeManager.getLogger().info(
"SCM in chill mode. Healthy pipelines reported count is {}, " +
"required healthy pipeline reported count is {}",
currentHealthyPipelineCount, healthyPipelineThresholdCount);
}

if (validate()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ public class SCMChillModeManager implements
private Configuration config;
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
private static final String PIPELINE_EXIT_RULE = "PipelineChillModeRule";
private static final String HEALTHY_PIPELINE_EXIT_RULE =
"HealthyPipelineChillModeRule";

private final EventQueue eventPublisher;
private final PipelineManager pipelineManager;
Expand All @@ -83,10 +84,10 @@ public SCMChillModeManager(Configuration conf,
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
&& pipelineManager != null) {
PipelineChillModeRule rule = new PipelineChillModeRule(pipelineManager,
this);
exitRules.put(PIPELINE_EXIT_RULE, rule);
eventPublisher.addHandler(SCMEvents.PIPELINE_REPORT, rule);
HealthyPipelineChillModeRule rule = new HealthyPipelineChillModeRule(
pipelineManager, this, config);
exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, rule);
eventPublisher.addHandler(SCMEvents.PROCESSED_PIPELINE_REPORT, rule);
}
emitChillModeStatus();
} else {
Expand Down Expand Up @@ -172,4 +173,10 @@ public double getCurrentContainerThreshold() {
.getCurrentContainerThreshold();
}

@VisibleForTesting
public HealthyPipelineChillModeRule getHealthyPipelineChillModeRule() {
return (HealthyPipelineChillModeRule)
exitRules.get(HEALTHY_PIPELINE_EXIT_RULE);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ public final class SCMEvents {
public static final TypedEvent<PipelineReportFromDatanode> PIPELINE_REPORT =
new TypedEvent<>(PipelineReportFromDatanode.class, "Pipeline_Report");

/**
* PipelineReport processed by pipeline report handler. This event is
* received by HealthyPipelineChillModeRule.
*/
public static final TypedEvent<PipelineReportFromDatanode>
PROCESSED_PIPELINE_REPORT = new TypedEvent<>(
PipelineReportFromDatanode.class, "Processed_Pipeline_Report");

/**
* PipelineActions are sent by Datanode. This event is received by
* SCMDatanodeHeartbeatDispatcher and PIPELINE_ACTIONS event is generated.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@

import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.server
.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
import org.apache.hadoop.hdds.server.events.EventHandler;
Expand All @@ -33,6 +36,7 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Objects;

/**
* Handles Pipeline Reports from datanode.
Expand All @@ -44,12 +48,21 @@ public class PipelineReportHandler implements
.getLogger(PipelineReportHandler.class);
private final PipelineManager pipelineManager;
private final Configuration conf;
private final SCMChillModeManager scmChillModeManager;
private final boolean pipelineAvailabilityCheck;

public PipelineReportHandler(PipelineManager pipelineManager,
public PipelineReportHandler(SCMChillModeManager scmChillModeManager,
PipelineManager pipelineManager,
Configuration conf) {
Preconditions.checkNotNull(pipelineManager);
Objects.requireNonNull(scmChillModeManager);
this.scmChillModeManager = scmChillModeManager;
this.pipelineManager = pipelineManager;
this.conf = conf;
this.pipelineAvailabilityCheck = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT);

}

@Override
Expand All @@ -70,6 +83,11 @@ public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
report, dn, e);
}
}
if (pipelineAvailabilityCheck && scmChillModeManager.getInChillMode()) {
publisher.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
pipelineReportFromDatanode);
}

}

private void processPipelineReport(PipelineReport report, DatanodeDetails dn)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public StorageContainerManager(OzoneConfiguration conf,
NodeReportHandler nodeReportHandler =
new NodeReportHandler(scmNodeManager);
PipelineReportHandler pipelineReportHandler =
new PipelineReportHandler(pipelineManager, conf);
new PipelineReportHandler(scmChillModeManager, pipelineManager, conf);
CommandStatusReportHandler cmdStatusReportHandler =
new CommandStatusReportHandler();

Expand Down
Loading

0 comments on commit 6c8c422

Please sign in to comment.