From 92ade1f6f9fa7591742307571a7b7be19576b678 Mon Sep 17 00:00:00 2001 From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com> Date: Sat, 19 Jun 2021 23:52:47 -0500 Subject: [PATCH] HDFS-16061. DFTestUtil.waitReplication can produce false positives (#3095). Contributed by Ahmed Hussein. Reviewed-by: Jim Brennan Signed-off-by: Ayush Saxena --- .../org/apache/hadoop/hdfs/DFSTestUtil.java | 51 +++++++++++-------- .../server/balancer/TestBalancerRPCDelay.java | 8 ++- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index 02dcb6d49a..e9a732f1a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -799,41 +799,48 @@ public String[] getFileNames(String topDir) { /** * Wait for the given file to reach the given replication factor. - * @throws TimeoutException if we fail to sufficiently replicate the file + * + * @param fs the defined filesystem. + * @param fileName being written. + * @param replFactor desired replication + * @throws IOException getting block locations + * @throws InterruptedException during sleep + * @throws TimeoutException if 40 seconds passed before reaching the desired + * replication. */ - public static void waitReplication(FileSystem fs, Path fileName, short replFactor) + public static void waitReplication(FileSystem fs, Path fileName, + short replFactor) throws IOException, InterruptedException, TimeoutException { boolean correctReplFactor; - final int ATTEMPTS = 40; - int count = 0; - + int attempt = 0; do { correctReplFactor = true; + if (attempt++ > 0) { + Thread.sleep(1000); + } BlockLocation locs[] = fs.getFileBlockLocations( - fs.getFileStatus(fileName), 0, Long.MAX_VALUE); - count++; - for (int j = 0; j < locs.length; j++) { - String[] hostnames = locs[j].getNames(); + fs.getFileStatus(fileName), 0, Long.MAX_VALUE); + for (int currLoc = 0; currLoc < locs.length; currLoc++) { + String[] hostnames = locs[currLoc].getNames(); if (hostnames.length != replFactor) { + LOG.info( + "Block {} of file {} has replication factor {} " + + "(desired {}); locations: {}", + currLoc, fileName, hostnames.length, replFactor, + Joiner.on(' ').join(hostnames)); correctReplFactor = false; - System.out.println("Block " + j + " of file " + fileName - + " has replication factor " + hostnames.length - + " (desired " + replFactor + "); locations " - + Joiner.on(' ').join(hostnames)); - Thread.sleep(1000); break; } } - if (correctReplFactor) { - System.out.println("All blocks of file " + fileName - + " verified to have replication factor " + replFactor); - } - } while (!correctReplFactor && count < ATTEMPTS); + } while (!correctReplFactor && attempt < 40); - if (count == ATTEMPTS) { - throw new TimeoutException("Timed out waiting for " + fileName + - " to reach " + replFactor + " replicas"); + if (!correctReplFactor) { + throw new TimeoutException("Timed out waiting for file [" + + fileName + "] to reach [" + replFactor + "] replicas"); } + + LOG.info("All blocks of file {} verified to have replication factor {}", + fileName, replFactor); } /** delete directory and everything underneath it.*/ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerRPCDelay.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerRPCDelay.java index 79c7f87d4e..9752d65982 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerRPCDelay.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerRPCDelay.java @@ -20,13 +20,17 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.junit.After; import org.junit.Before; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.Timeout; /** * The Balancer ensures that it disperses RPCs to the NameNode * in order to avoid NN's RPC queue saturation. */ public class TestBalancerRPCDelay { + @Rule + public Timeout globalTimeout = Timeout.seconds(100); private TestBalancer testBalancer; @@ -43,12 +47,12 @@ public void teardown() throws Exception { } } - @Test(timeout=100000) + @Test public void testBalancerRPCDelayQps3() throws Exception { testBalancer.testBalancerRPCDelay(3); } - @Test(timeout=100000) + @Test public void testBalancerRPCDelayQpsDefault() throws Exception { testBalancer.testBalancerRPCDelay( DFSConfigKeys.DFS_NAMENODE_GETBLOCKS_MAX_QPS_DEFAULT);