HADOOP-13933. Add haadmin -getAllServiceState option to get the HA state of all the NameNodes/ResourceManagers. Contributed by Surendra Singh Lilhore.
This commit is contained in:
parent
cf695577aa
commit
e407449ddb
@ -80,6 +80,8 @@ public abstract class HAAdmin extends Configured implements Tool {
|
||||
"--" + FORCEACTIVE + " option is used."))
|
||||
.put("-getServiceState",
|
||||
new UsageInfo("<serviceId>", "Returns the state of the service"))
|
||||
.put("-getAllServiceState",
|
||||
new UsageInfo(null, "Returns the state of all the services"))
|
||||
.put("-checkHealth",
|
||||
new UsageInfo("<serviceId>",
|
||||
"Requests that the service perform a health check.\n" +
|
||||
@ -119,7 +121,11 @@ public abstract class HAAdmin extends Configured implements Tool {
|
||||
String cmd = e.getKey();
|
||||
UsageInfo usage = e.getValue();
|
||||
|
||||
errOut.println(" [" + cmd + " " + usage.args + "]");
|
||||
if (usage.args == null) {
|
||||
errOut.println(" [" + cmd + "]");
|
||||
} else {
|
||||
errOut.println(" [" + cmd + " " + usage.args + "]");
|
||||
}
|
||||
}
|
||||
errOut.println();
|
||||
ToolRunner.printGenericCommandUsage(errOut);
|
||||
@ -130,7 +136,11 @@ public abstract class HAAdmin extends Configured implements Tool {
|
||||
if (usage == null) {
|
||||
throw new RuntimeException("No usage for cmd " + cmd);
|
||||
}
|
||||
errOut.println(getUsageString() + " [" + cmd + " " + usage.args + "]");
|
||||
if (usage.args == null) {
|
||||
errOut.println(getUsageString() + " [" + cmd + "]");
|
||||
} else {
|
||||
errOut.println(getUsageString() + " [" + cmd + " " + usage.args + "]");
|
||||
}
|
||||
}
|
||||
|
||||
private int transitionToActive(final CommandLine cmd)
|
||||
@ -455,6 +465,8 @@ public abstract class HAAdmin extends Configured implements Tool {
|
||||
return failover(cmdLine);
|
||||
} else if ("-getServiceState".equals(cmd)) {
|
||||
return getServiceState(cmdLine);
|
||||
} else if ("-getAllServiceState".equals(cmd)) {
|
||||
return getAllServiceState();
|
||||
} else if ("-checkHealth".equals(cmd)) {
|
||||
return checkHealth(cmdLine);
|
||||
} else if ("-help".equals(cmd)) {
|
||||
@ -465,7 +477,30 @@ public abstract class HAAdmin extends Configured implements Tool {
|
||||
throw new AssertionError("Should not get here, command: " + cmd);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected int getAllServiceState() {
|
||||
Collection<String> targetIds = getTargetIds(null);
|
||||
if (targetIds.isEmpty()) {
|
||||
errOut.println("Failed to get service IDs");
|
||||
return -1;
|
||||
}
|
||||
for (String targetId : targetIds) {
|
||||
HAServiceTarget target = resolveTarget(targetId);
|
||||
String address = target.getAddress().getHostName() + ":"
|
||||
+ target.getAddress().getPort();
|
||||
try {
|
||||
HAServiceProtocol proto = target.getProxy(getConf(),
|
||||
rpcTimeoutForChecks);
|
||||
out.println(String.format("%-50s %-10s", address, proto
|
||||
.getServiceStatus().getState()));
|
||||
} catch (IOException e) {
|
||||
out.println(String.format("%-50s %-10s", address,
|
||||
"Failed to connect: " + e.getMessage()));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private boolean confirmForceManual() throws IOException {
|
||||
return ToolRunner.confirmPrompt(
|
||||
"You have specified the --" + FORCEMANUAL + " flag. This flag is " +
|
||||
@ -532,7 +567,11 @@ public abstract class HAAdmin extends Configured implements Tool {
|
||||
return -1;
|
||||
}
|
||||
|
||||
out.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
|
||||
if (usageInfo.args == null) {
|
||||
out.println(cmd + ": " + usageInfo.help);
|
||||
} else {
|
||||
out.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -486,6 +486,7 @@ Usage:
|
||||
hdfs haadmin -transitionToStandby <serviceId>
|
||||
hdfs haadmin -failover [--forcefence] [--forceactive] <serviceId> <serviceId>
|
||||
hdfs haadmin -getServiceState <serviceId>
|
||||
hdfs haadmin -getAllServiceState
|
||||
hdfs haadmin -checkHealth <serviceId>
|
||||
hdfs haadmin -help <command>
|
||||
|
||||
@ -495,6 +496,7 @@ Usage:
|
||||
| `-checkHealth` | check the health of the given NameNode |
|
||||
| `-failover` | initiate a failover between two NameNodes |
|
||||
| `-getServiceState` | determine whether the given NameNode is Active or Standby |
|
||||
| `-getAllServiceState` | returns the state of all the NameNodes | |
|
||||
| `-transitionToActive` | transition the state of the given NameNode to Active (Warning: No fencing is done) |
|
||||
| `-transitionToStandby` | transition the state of the given NameNode to Standby (Warning: No fencing is done) |
|
||||
| `-help` [cmd] | Displays help for the given command or all commands if none is specified. |
|
||||
|
@ -350,6 +350,7 @@ Now that your HA NameNodes are configured and started, you will have access to s
|
||||
[-transitionToStandby <serviceId>]
|
||||
[-failover [--forcefence] [--forceactive] <serviceId> <serviceId>]
|
||||
[-getServiceState <serviceId>]
|
||||
[-getAllServiceState]
|
||||
[-checkHealth <serviceId>]
|
||||
[-help <command>]
|
||||
|
||||
@ -381,6 +382,11 @@ This guide describes high-level uses of each of these subcommands. For specific
|
||||
used by cron jobs or monitoring scripts which need to behave differently based
|
||||
on whether the NameNode is currently Active or Standby.
|
||||
|
||||
* **getAllServiceState** - returns the state of all the NameNodes
|
||||
|
||||
Connect to the configured NameNodes to determine the current state, print
|
||||
either "standby" or "active" to STDOUT appropriately.
|
||||
|
||||
* **checkHealth** - check the health of the given NameNode
|
||||
|
||||
Connect to the provided NameNode to check its health. The NameNode is capable
|
||||
|
@ -399,6 +399,7 @@ Now that your HA NameNodes are configured and started, you will have access to s
|
||||
[-transitionToStandby <serviceId>]
|
||||
[-failover [--forcefence] [--forceactive] <serviceId> <serviceId>]
|
||||
[-getServiceState <serviceId>]
|
||||
[-getAllServiceState]
|
||||
[-checkHealth <serviceId>]
|
||||
[-help <command>]
|
||||
|
||||
@ -430,6 +431,11 @@ This guide describes high-level uses of each of these subcommands. For specific
|
||||
used by cron jobs or monitoring scripts which need to behave differently based
|
||||
on whether the NameNode is currently Active or Standby.
|
||||
|
||||
* **getAllServiceState** - returns the state of all the NameNodes
|
||||
|
||||
Connect to the configured NameNodes to determine the current state, print
|
||||
either "standby" or "active" to STDOUT appropriately.
|
||||
|
||||
* **checkHealth** - check the health of the given NameNode
|
||||
|
||||
Connect to the provided NameNode to check its health. The NameNode is capable
|
||||
|
@ -173,7 +173,18 @@ public class TestDFSHAAdmin {
|
||||
assertEquals(0, runTool("-help", "transitionToActive"));
|
||||
assertOutputContains("Transitions the service into Active");
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetAllServiceState() throws Exception {
|
||||
Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol)
|
||||
.getServiceStatus();
|
||||
assertEquals(0, runTool("-getAllServiceState"));
|
||||
assertOutputContains(String.format("%-50s %-10s", (HOST_A + ":" + 12345),
|
||||
STANDBY_READY_RESULT.getState()));
|
||||
assertOutputContains(String.format("%-50s %-10s", (HOST_B + ":" + 12345),
|
||||
STANDBY_READY_RESULT.getState()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTransitionToActive() throws Exception {
|
||||
Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus();
|
||||
|
@ -174,6 +174,10 @@ public class RMAdminCLI extends HAAdmin {
|
||||
this.errOut = errOut;
|
||||
}
|
||||
|
||||
protected void setOut(PrintStream out) {
|
||||
this.out = out;
|
||||
}
|
||||
|
||||
private static void appendHAUsage(final StringBuilder usageBuilder) {
|
||||
for (Map.Entry<String,UsageInfo> cmdEntry : USAGE.entrySet()) {
|
||||
if (cmdEntry.getKey().equals("-help")
|
||||
@ -181,7 +185,12 @@ public class RMAdminCLI extends HAAdmin {
|
||||
continue;
|
||||
}
|
||||
UsageInfo usageInfo = cmdEntry.getValue();
|
||||
usageBuilder.append(" [" + cmdEntry.getKey() + " " + usageInfo.args + "]");
|
||||
if (usageInfo.args == null) {
|
||||
usageBuilder.append(" [" + cmdEntry.getKey() + "]");
|
||||
} else {
|
||||
usageBuilder.append(" [" + cmdEntry.getKey() + " " + usageInfo.args
|
||||
+ "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,9 +202,13 @@ public class RMAdminCLI extends HAAdmin {
|
||||
return;
|
||||
}
|
||||
}
|
||||
String space = (usageInfo.args == "") ? "" : " ";
|
||||
builder.append(" " + cmd + space + usageInfo.args + ": " +
|
||||
usageInfo.help);
|
||||
if (usageInfo.args == null) {
|
||||
builder.append(" " + cmd + ": " + usageInfo.help);
|
||||
} else {
|
||||
String space = (usageInfo.args == "") ? "" : " ";
|
||||
builder.append(" " + cmd + space + usageInfo.args + ": "
|
||||
+ usageInfo.help);
|
||||
}
|
||||
}
|
||||
|
||||
private static void buildIndividualUsageMsg(String cmd,
|
||||
@ -209,10 +222,13 @@ public class RMAdminCLI extends HAAdmin {
|
||||
}
|
||||
isHACommand = true;
|
||||
}
|
||||
String space = (usageInfo.args == "") ? "" : " ";
|
||||
builder.append("Usage: yarn rmadmin ["
|
||||
+ cmd + space + usageInfo.args
|
||||
+ "]\n");
|
||||
if (usageInfo.args == null) {
|
||||
builder.append("Usage: yarn rmadmin [" + cmd + "]\n");
|
||||
} else {
|
||||
String space = (usageInfo.args == "") ? "" : " ";
|
||||
builder.append("Usage: yarn rmadmin [" + cmd + space + usageInfo.args
|
||||
+ "]\n");
|
||||
}
|
||||
if (isHACommand) {
|
||||
builder.append(cmd + " can only be used when RM HA is enabled");
|
||||
}
|
||||
@ -230,7 +246,11 @@ public class RMAdminCLI extends HAAdmin {
|
||||
String cmdKey = cmdEntry.getKey();
|
||||
if (!cmdKey.equals("-help")) {
|
||||
UsageInfo usageInfo = cmdEntry.getValue();
|
||||
builder.append(" " + cmdKey + " " + usageInfo.args + "\n");
|
||||
if (usageInfo.args == null) {
|
||||
builder.append(" " + cmdKey + "\n");
|
||||
} else {
|
||||
builder.append(" " + cmdKey + " " + usageInfo.args + "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,11 +45,13 @@ import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.apache.hadoop.ha.HAServiceStatus;
|
||||
import org.apache.hadoop.ha.HAServiceTarget;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||
import org.apache.hadoop.service.Service.STATE;
|
||||
import org.apache.hadoop.yarn.api.records.DecommissionType;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.conf.HAUtil;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
||||
@ -67,13 +69,13 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsReque
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationRequest;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.mockito.ArgumentCaptor;
|
||||
import org.mockito.ArgumentMatcher;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.invocation.InvocationOnMock;
|
||||
import org.mockito.stubbing.Answer;
|
||||
|
||||
@ -88,6 +90,8 @@ public class TestRMAdminCLI {
|
||||
private RMAdminCLI rmAdminCLIWithHAEnabled;
|
||||
private CommonNodeLabelsManager dummyNodeLabelsManager;
|
||||
private boolean remoteAdminServiceAccessed = false;
|
||||
private static final String HOST_A = "1.2.3.1";
|
||||
private static final String HOST_B = "1.2.3.2";
|
||||
|
||||
@SuppressWarnings("static-access")
|
||||
@Before
|
||||
@ -130,6 +134,14 @@ public class TestRMAdminCLI {
|
||||
YarnConfiguration conf = new YarnConfiguration();
|
||||
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
|
||||
conf.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2");
|
||||
conf.set(HAUtil.addSuffix(YarnConfiguration.RM_ADDRESS, "rm1"), HOST_A
|
||||
+ ":12345");
|
||||
conf.set(HAUtil.addSuffix(YarnConfiguration.RM_ADMIN_ADDRESS, "rm1"),
|
||||
HOST_A + ":12346");
|
||||
conf.set(HAUtil.addSuffix(YarnConfiguration.RM_ADDRESS, "rm2"), HOST_B
|
||||
+ ":12345");
|
||||
conf.set(HAUtil.addSuffix(YarnConfiguration.RM_ADMIN_ADDRESS, "rm2"),
|
||||
HOST_B + ":12346");
|
||||
rmAdminCLIWithHAEnabled = new RMAdminCLI(conf) {
|
||||
|
||||
@Override
|
||||
@ -140,7 +152,17 @@ public class TestRMAdminCLI {
|
||||
|
||||
@Override
|
||||
protected HAServiceTarget resolveTarget(String rmId) {
|
||||
return haServiceTarget;
|
||||
HAServiceTarget target = super.resolveTarget(rmId);
|
||||
HAServiceTarget spy = Mockito.spy(target);
|
||||
// Override the target to return our mock protocol
|
||||
try {
|
||||
Mockito.doReturn(haadmin).when(spy)
|
||||
.getProxy(Mockito.<Configuration> any(), Mockito.anyInt());
|
||||
Mockito.doReturn(false).when(spy).isAutoFailoverEnabled();
|
||||
} catch (IOException e) {
|
||||
throw new AssertionError(e); // mock setup doesn't really throw
|
||||
}
|
||||
return spy;
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -424,6 +446,24 @@ public class TestRMAdminCLI {
|
||||
verify(haadmin).getServiceStatus();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAllServiceState() throws Exception {
|
||||
HAServiceStatus standbyStatus = new HAServiceStatus(
|
||||
HAServiceState.STANDBY).setReadyToBecomeActive();
|
||||
Mockito.doReturn(standbyStatus).when(haadmin).getServiceStatus();
|
||||
ByteArrayOutputStream dataOut = new ByteArrayOutputStream();
|
||||
rmAdminCLIWithHAEnabled.setOut(new PrintStream(dataOut));
|
||||
String[] args = {"-getAllServiceState"};
|
||||
assertEquals(0, rmAdminCLIWithHAEnabled.run(args));
|
||||
assertTrue(dataOut.toString().contains(
|
||||
String.format("%-50s %-10s", (HOST_A + ":" + 12346),
|
||||
standbyStatus.getState())));
|
||||
assertTrue(dataOut.toString().contains(
|
||||
String.format("%-50s %-10s", (HOST_B + ":" + 12346),
|
||||
standbyStatus.getState())));
|
||||
rmAdminCLIWithHAEnabled.setOut(System.out);
|
||||
}
|
||||
|
||||
@Test(timeout = 500)
|
||||
public void testCheckHealth() throws Exception {
|
||||
String[] args = {"-checkHealth", "rm1"};
|
||||
@ -572,7 +612,8 @@ public class TestRMAdminCLI {
|
||||
+ "([OvercommitTimeout]) "
|
||||
+ "[-transitionToActive [--forceactive] <serviceId>] "
|
||||
+ "[-transitionToStandby <serviceId>] "
|
||||
+ "[-getServiceState <serviceId>] [-checkHealth <serviceId>] [-help [cmd]]";
|
||||
+ "[-getServiceState <serviceId>] [-getAllServiceState] "
|
||||
+ "[-checkHealth <serviceId>] [-help [cmd]]";
|
||||
String actualHelpMsg = dataOut.toString();
|
||||
assertTrue(String.format("Help messages: %n " + actualHelpMsg + " %n doesn't include expected " +
|
||||
"messages: %n" + expectedHelpMsg), actualHelpMsg.contains(expectedHelpMsg
|
||||
|
@ -230,6 +230,7 @@ Usage:
|
||||
-transitionToStandby <serviceId>
|
||||
-failover [--forcefence] [--forceactive] <serviceId> <serviceId>
|
||||
-getServiceState <serviceId>
|
||||
-getAllServiceState
|
||||
-checkHealth <serviceId>
|
||||
-help [cmd]
|
||||
```
|
||||
@ -254,6 +255,7 @@ Usage:
|
||||
| -transitionToStandby [--forcemanual] \<serviceId\> | Transitions the service into Standby state. This command can not be used if automatic failover is enabled. Though you can override this by --forcemanual option, you need caution. |
|
||||
| -failover [--forceactive] \<serviceId1\> \<serviceId2\> | Initiate a failover from serviceId1 to serviceId2. Try to failover to the target service even if it is not ready if the --forceactive option is used. This command can not be used if automatic failover is enabled. |
|
||||
| -getServiceState \<serviceId\> | Returns the state of the service. |
|
||||
| -getAllServiceState | Returns the state of all the services. |
|
||||
| -checkHealth \<serviceId\> | Requests that the service perform a health check. The RMAdmin tool will exit with a non-zero exit code if the check fails. |
|
||||
| -help [cmd] | Displays help for the given command or all commands if none is specified. |
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user