Skip to content

Commit c255feb

Browse files
authored
HDFS-16061. DFTestUtil.waitReplication can produce false positives (apache#3095). Contributed by Ahmed Hussein.
Reviewed-by: Jim Brennan <jbrennan@apache.org> Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
1 parent c491f81 commit c255feb

File tree

2 files changed

+35
-24
lines changed

2 files changed

+35
-24
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -794,41 +794,48 @@ public String[] getFileNames(String topDir) {
794794

795795
/**
796796
* Wait for the given file to reach the given replication factor.
797-
* @throws TimeoutException if we fail to sufficiently replicate the file
797+
*
798+
* @param fs the defined filesystem.
799+
* @param fileName being written.
800+
* @param replFactor desired replication
801+
* @throws IOException getting block locations
802+
* @throws InterruptedException during sleep
803+
* @throws TimeoutException if 40 seconds passed before reaching the desired
804+
* replication.
798805
*/
799-
public static void waitReplication(FileSystem fs, Path fileName, short replFactor)
806+
public static void waitReplication(FileSystem fs, Path fileName,
807+
short replFactor)
800808
throws IOException, InterruptedException, TimeoutException {
801809
boolean correctReplFactor;
802-
final int ATTEMPTS = 40;
803-
int count = 0;
804-
810+
int attempt = 0;
805811
do {
806812
correctReplFactor = true;
813+
if (attempt++ > 0) {
814+
Thread.sleep(1000);
815+
}
807816
BlockLocation locs[] = fs.getFileBlockLocations(
808-
fs.getFileStatus(fileName), 0, Long.MAX_VALUE);
809-
count++;
810-
for (int j = 0; j < locs.length; j++) {
811-
String[] hostnames = locs[j].getNames();
817+
fs.getFileStatus(fileName), 0, Long.MAX_VALUE);
818+
for (int currLoc = 0; currLoc < locs.length; currLoc++) {
819+
String[] hostnames = locs[currLoc].getNames();
812820
if (hostnames.length != replFactor) {
821+
LOG.info(
822+
"Block {} of file {} has replication factor {} "
823+
+ "(desired {}); locations: {}",
824+
currLoc, fileName, hostnames.length, replFactor,
825+
Joiner.on(' ').join(hostnames));
813826
correctReplFactor = false;
814-
System.out.println("Block " + j + " of file " + fileName
815-
+ " has replication factor " + hostnames.length
816-
+ " (desired " + replFactor + "); locations "
817-
+ Joiner.on(' ').join(hostnames));
818-
Thread.sleep(1000);
819827
break;
820828
}
821829
}
822-
if (correctReplFactor) {
823-
System.out.println("All blocks of file " + fileName
824-
+ " verified to have replication factor " + replFactor);
825-
}
826-
} while (!correctReplFactor && count < ATTEMPTS);
830+
} while (!correctReplFactor && attempt < 40);
827831

828-
if (count == ATTEMPTS) {
829-
throw new TimeoutException("Timed out waiting for " + fileName +
830-
" to reach " + replFactor + " replicas");
832+
if (!correctReplFactor) {
833+
throw new TimeoutException("Timed out waiting for file ["
834+
+ fileName + "] to reach [" + replFactor + "] replicas");
831835
}
836+
837+
LOG.info("All blocks of file {} verified to have replication factor {}",
838+
fileName, replFactor);
832839
}
833840

834841
/** delete directory and everything underneath it.*/

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerRPCDelay.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,17 @@
2020
import org.apache.hadoop.hdfs.DFSConfigKeys;
2121
import org.junit.After;
2222
import org.junit.Before;
23+
import org.junit.Rule;
2324
import org.junit.Test;
25+
import org.junit.rules.Timeout;
2426

2527
/**
2628
* The Balancer ensures that it disperses RPCs to the NameNode
2729
* in order to avoid NN's RPC queue saturation.
2830
*/
2931
public class TestBalancerRPCDelay {
32+
@Rule
33+
public Timeout globalTimeout = Timeout.seconds(100);
3034

3135
private TestBalancer testBalancer;
3236

@@ -43,12 +47,12 @@ public void teardown() throws Exception {
4347
}
4448
}
4549

46-
@Test(timeout=100000)
50+
@Test
4751
public void testBalancerRPCDelayQps3() throws Exception {
4852
testBalancer.testBalancerRPCDelay(3);
4953
}
5054

51-
@Test(timeout=100000)
55+
@Test
5256
public void testBalancerRPCDelayQpsDefault() throws Exception {
5357
testBalancer.testBalancerRPCDelay(
5458
DFSConfigKeys.DFS_NAMENODE_GETBLOCKS_MAX_QPS_DEFAULT);

0 commit comments

Comments
 (0)