Skip to content

Commit d34f6b6

Browse files
committed
HDFS-15925. The lack of packet-level mirrorError state synchronization in BlockReceiver can cause the HDFS client to hang
1 parent f5c1557 commit d34f6b6

File tree

1 file changed

+17
-4
lines changed
  • hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode

1 file changed

+17
-4
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.util.ArrayDeque;
3333
import java.util.Arrays;
3434
import java.util.Queue;
35+
import java.util.concurrent.Semaphore;
3536
import java.util.concurrent.atomic.AtomicLong;
3637
import java.util.zip.Checksum;
3738

@@ -532,7 +533,7 @@ private boolean shouldVerifyChecksum() {
532533
* Receives and processes a packet. It can contain many chunks.
533534
* returns the number of data bytes that the packet has.
534535
*/
535-
private int receivePacket() throws IOException {
536+
private int receivePacket(final Semaphore ackSema) throws IOException {
536537
// read the next packet
537538
packetReceiver.receiveNextPacket(in);
538539

@@ -616,6 +617,9 @@ private int receivePacket() throws IOException {
616617
handleMirrorOutError(e);
617618
}
618619
}
620+
if (ackSema != null) {
621+
ackSema.release();
622+
}
619623

620624
ByteBuffer dataBuf = packetReceiver.getDataSlice();
621625
ByteBuffer checksumBuf = packetReceiver.getChecksumSlice();
@@ -984,13 +988,15 @@ void receiveBlock(
984988
this.isReplaceBlock = isReplaceBlock;
985989

986990
try {
991+
Semaphore ackSema = null;
987992
if (isClient && !isTransfer) {
993+
ackSema = new Semaphore(0);
988994
responder = new Daemon(datanode.threadGroup,
989-
new PacketResponder(replyOut, mirrIn, downstreams));
995+
new PacketResponder(replyOut, mirrIn, downstreams, ackSema));
990996
responder.start(); // start thread to processes responses
991997
}
992998

993-
while (receivePacket() >= 0) { /* Receive until the last packet */ }
999+
while (receivePacket(ackSema) >= 0) { /* Receive until the last packet */ }
9941000

9951001
// wait for all outstanding packet responses. And then
9961002
// indicate responder to gracefully shutdown.
@@ -1246,16 +1252,20 @@ class PacketResponder implements Runnable, Closeable {
12461252
/** for log and error messages */
12471253
private final String myString;
12481254
private boolean sending = false;
1255+
/** for synchronization with BlockReceiver */
1256+
private final Semaphore ackSema;
12491257

12501258
@Override
12511259
public String toString() {
12521260
return myString;
12531261
}
12541262

12551263
PacketResponder(final DataOutputStream upstreamOut,
1256-
final DataInputStream downstreamIn, final DatanodeInfo[] downstreams) {
1264+
final DataInputStream downstreamIn, final DatanodeInfo[] downstreams,
1265+
final Semaphore ackSema) {
12571266
this.downstreamIn = downstreamIn;
12581267
this.upstreamOut = upstreamOut;
1268+
this.ackSema = ackSema;
12591269

12601270
this.type = downstreams == null? PacketResponderType.NON_PIPELINE
12611271
: downstreams.length == 0? PacketResponderType.LAST_IN_PIPELINE
@@ -1395,6 +1405,9 @@ public void run() {
13951405
long seqno = PipelineAck.UNKOWN_SEQNO;
13961406
long ackRecvNanoTime = 0;
13971407
try {
1408+
if (ackSema != null) {
1409+
ackSema.acquire();
1410+
}
13981411
if (type != PacketResponderType.LAST_IN_PIPELINE && !mirrorError) {
13991412
DataNodeFaultInjector.get().failPipeline(replicaInfo, mirrorAddr);
14001413
// read an ack from downstream datanode

0 commit comments

Comments
 (0)