HDFS-16373. Fix MiniDFSCluster restart in case of multiple namenodes. (#3756)

Reviewed-by: Viraj Jasani <vjasani@apache.org>
Reviewed-by: litao <tomleescut@gmail.com>
Signed-off-by: Takanobu Asanuma <tasanuma@apache.org>
This commit is contained in:
Ayush Saxena 2021-12-14 13:51:51 +05:30 committed by GitHub
parent c56a07f36b
commit d29f0e83a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 21 deletions

View File

@ -2267,9 +2267,11 @@ public synchronized void restartNameNode(int nnIndex, boolean waitActive,
info.nameNode = nn;
info.setStartOpt(startOpt);
if (waitActive) {
waitClusterUp();
if (numDataNodes > 0) {
waitNameNodeUp(nnIndex);
}
LOG.info("Restarted the namenode");
waitActive();
waitActive(nnIndex);
}
}
@ -2775,11 +2777,25 @@ public void waitActive(int nnIndex) throws IOException {
DFSClient client = new DFSClient(addr, conf);
// ensure all datanodes have registered and sent heartbeat to the namenode
while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) {
int failedCount = 0;
while (true) {
try {
LOG.info("Waiting for cluster to become active");
Thread.sleep(100);
while (shouldWait(client.datanodeReport(DatanodeReportType.LIVE), addr)) {
LOG.info("Waiting for cluster to become active");
Thread.sleep(100);
}
break;
} catch (IOException e) {
failedCount++;
// Cached RPC connection to namenode, if any, is expected to fail once
if (failedCount > 1) {
LOG.warn("Tried waitActive() " + failedCount
+ " time(s) and failed, giving up. " + StringUtils
.stringifyException(e));
throw e;
}
} catch (InterruptedException e) {
throw new IOException(e);
}
}
@ -2815,22 +2831,7 @@ public Boolean get() {
*/
public void waitActive() throws IOException {
for (int index = 0; index < namenodes.size(); index++) {
int failedCount = 0;
while (true) {
try {
waitActive(index);
break;
} catch (IOException e) {
failedCount++;
// Cached RPC connection to namenode, if any, is expected to fail once
if (failedCount > 1) {
LOG.warn("Tried waitActive() " + failedCount
+ " time(s) and failed, giving up. "
+ StringUtils.stringifyException(e));
throw e;
}
}
}
waitActive(index);
}
LOG.info("Cluster is active");
}

View File

@ -309,6 +309,14 @@ public void testSetUpFederatedCluster() throws Exception {
DFSUtil.addKeySuffixes(
DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn1")));
}
// Shutdown namenodes individually.
cluster.shutdownNameNode(0);
cluster.shutdownNameNode(1);
// Restart namenodes individually with wait active, both should be successful.
cluster.restartNameNode(0);
cluster.restartNameNode(1);
}
}
}