YARN-11467. RM failover may fail when the nodes.exclude-path file does not exist (#5565)

This commit is contained in:
cxzl25 2023-05-10 15:16:33 +08:00 committed by GitHub
parent d95b5c679d
commit be50d221f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 108 additions and 1 deletions

View File

@ -220,7 +220,11 @@ private void printConfiguredHosts(boolean graceful) {
public void refreshNodes(Configuration yarnConf)
throws IOException, YarnException {
refreshNodes(yarnConf, false);
try {
refreshNodes(yarnConf, false);
} catch (YarnException | IOException ex) {
disableHostsFileReader(ex);
}
}
public void refreshNodes(Configuration yarnConf, boolean graceful)

View File

@ -18,6 +18,10 @@
package org.apache.hadoop.yarn.server.resourcemanager;
import java.io.DataOutputStream;
import java.io.File;
import java.nio.file.Files;
import java.util.UUID;
import java.util.function.Supplier;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -742,6 +746,105 @@ public void testResourceProfilesManagerAfterRMWentStandbyThenBackToActive()
rm.getRMContext().getResourceProfilesManager());
}
@Test
public void testTransitionedToActiveWithExcludeFileNotExist() throws Exception {
final String errUnforcedRequest = "User request succeeded even when " +
"automatic failover is enabled";
Configuration conf = new YarnConfiguration(configuration);
String nodeExcludeFilePath = "/tmp/non-existent-path-" + UUID.randomUUID();
conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, nodeExcludeFilePath);
DataOutputStream output = null;
final File confFile =
new File("target/test-classes/"+YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
final File backupConfFile = new File(
"target/test-classes/" + YarnConfiguration.YARN_SITE_CONFIGURATION_FILE
+ ".backup." + UUID.randomUUID());
boolean hasRenamed = false;
try {
if (confFile.exists()) {
hasRenamed = confFile.renameTo(backupConfFile);
if (!hasRenamed) {
Assert.fail("Can not rename " + confFile.getAbsolutePath() + " to "
+ backupConfFile.getAbsolutePath());
}
}
if (!confFile.createNewFile()) {
Assert.fail(
"Can not create " + YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
}
output = new DataOutputStream(Files.newOutputStream(confFile.toPath()));
conf.writeXml(output);
} finally {
if (output != null) {
output.close();
}
}
try {
rm = new MockRM(conf);
rm.init(conf);
rm.start();
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
// Transition to standby
try {
rm.adminService.transitionToStandby(requestInfo);
fail(errUnforcedRequest);
} catch (AccessControlException e) {
// expected
}
checkMonitorHealth();
checkStandbyRMFunctionality();
// Transition to active
try {
rm.adminService.transitionToActive(requestInfo);
fail(errUnforcedRequest);
} catch (AccessControlException e) {
// expected
}
checkMonitorHealth();
checkStandbyRMFunctionality();
final String errForcedRequest =
"Forced request by user should work " + "even if automatic failover is enabled";
requestInfo = new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER_FORCED);
// Transition to standby
try {
rm.adminService.transitionToStandby(requestInfo);
} catch (AccessControlException e) {
fail(errForcedRequest);
}
checkMonitorHealth();
checkStandbyRMFunctionality();
// Transition to active
try {
rm.adminService.transitionToActive(requestInfo);
} catch (AccessControlException e) {
fail(errForcedRequest);
}
checkMonitorHealth();
checkActiveRMFunctionality();
} finally {
if (confFile.exists()) {
if (!hasRenamed) {
confFile.delete();
} else {
backupConfFile.renameTo(confFile);
}
}
if (rm != null) {
rm.stop();
}
}
}
public void innerTestHAWithRMHostName(boolean includeBindHost) {
//this is run two times, with and without a bind host configured
if (includeBindHost) {