From d29f0e83a935a0443537064d1e7290ea67301045 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 14 Dec 2021 13:51:51 +0530 Subject: [PATCH] HDFS-16373. Fix MiniDFSCluster restart in case of multiple namenodes. (#3756) Reviewed-by: Viraj Jasani Reviewed-by: litao Signed-off-by: Takanobu Asanuma --- .../apache/hadoop/hdfs/MiniDFSCluster.java | 43 ++++++++++--------- .../hadoop/hdfs/TestMiniDFSCluster.java | 8 ++++ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index e4b6434b48..703111ed2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -2267,9 +2267,11 @@ public synchronized void restartNameNode(int nnIndex, boolean waitActive, info.nameNode = nn; info.setStartOpt(startOpt); if (waitActive) { - waitClusterUp(); + if (numDataNodes > 0) { + waitNameNodeUp(nnIndex); + } LOG.info("Restarted the namenode"); - waitActive(); + waitActive(nnIndex); } } @@ -2775,11 +2777,25 @@ public void waitActive(int nnIndex) throws IOException { DFSClient client = new DFSClient(addr, conf); // ensure all datanodes have registered and sent heartbeat to the namenode - while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) { + int failedCount = 0; + while (true) { try { - LOG.info("Waiting for cluster to become active"); - Thread.sleep(100); + while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) { + LOG.info("Waiting for cluster to become active"); + Thread.sleep(100); + } + break; + } catch (IOException e) { + failedCount++; + // Cached RPC connection to namenode, if any, is expected to fail once + if (failedCount > 1) { + LOG.warn("Tried waitActive() " + failedCount + + " time(s) and failed, giving up. " + StringUtils + .stringifyException(e)); + throw e; + } } catch (InterruptedException e) { + throw new IOException(e); } } @@ -2815,22 +2831,7 @@ public Boolean get() { */ public void waitActive() throws IOException { for (int index = 0; index < namenodes.size(); index++) { - int failedCount = 0; - while (true) { - try { - waitActive(index); - break; - } catch (IOException e) { - failedCount++; - // Cached RPC connection to namenode, if any, is expected to fail once - if (failedCount > 1) { - LOG.warn("Tried waitActive() " + failedCount - + " time(s) and failed, giving up. " - + StringUtils.stringifyException(e)); - throw e; - } - } - } + waitActive(index); } LOG.info("Cluster is active"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java index 74cfe9d673..737795b88d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java @@ -309,6 +309,14 @@ public void testSetUpFederatedCluster() throws Exception { DFSUtil.addKeySuffixes( DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn1"))); } + + // Shutdown namenodes individually. + cluster.shutdownNameNode(0); + cluster.shutdownNameNode(1); + + // Restart namenodes individually with wait active, both should be successful. + cluster.restartNameNode(0); + cluster.restartNameNode(1); } } }