HADOOP-7961. Move HA fencing to common. Contributed by Eli Collins
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9a07ba8945
commit
2d0b340c47
@ -264,6 +264,10 @@
|
||||
<artifactId>json-simple</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.jcraft</groupId>
|
||||
<artifactId>jsch</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
@ -27,7 +27,6 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.jcraft.jsch.ChannelExec;
|
||||
@ -36,11 +35,11 @@
|
||||
import com.jcraft.jsch.Session;
|
||||
|
||||
/**
|
||||
* This fencing implementation sshes to the target node and uses <code>fuser</code>
|
||||
* to kill the process listening on the NameNode's TCP port. This is
|
||||
* more accurate than using "jps" since it doesn't require parsing,
|
||||
* and will work even if there are multiple NameNodes running on the
|
||||
* same machine.<p>
|
||||
* This fencing implementation sshes to the target node and uses
|
||||
* <code>fuser</code> to kill the process listening on the service's
|
||||
* TCP port. This is more accurate than using "jps" since it doesn't
|
||||
* require parsing, and will work even if there are multiple service
|
||||
* processes running on the same machine.<p>
|
||||
* It returns a successful status code if:
|
||||
* <ul>
|
||||
* <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
|
||||
@ -49,7 +48,7 @@
|
||||
* <p>
|
||||
* This fencing mechanism is configured as following in the fencing method
|
||||
* list:
|
||||
* <code>sshfence([username@]nnhost[:ssh-port][, target-nn-port])</code>
|
||||
* <code>sshfence([username@]nnhost[:ssh-port], target-port)</code>
|
||||
* where the first argument specifies the username, host, and port to ssh
|
||||
* into, and the second argument specifies the port on which the target
|
||||
* NN process is listening on.
|
||||
@ -58,9 +57,6 @@
|
||||
* <code>other-nn<code> as the current user on the standard SSH port,
|
||||
* then kill whatever process is listening on port 8020.
|
||||
* <p>
|
||||
* If no <code>target-nn-port</code> is specified, it is assumed that the
|
||||
* target NameNode is listening on the same port as the local NameNode.
|
||||
* <p>
|
||||
* In order to achieve passwordless SSH, the operator must also configure
|
||||
* <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
|
||||
* SSH key that has passphrase-less access to the given username and host.
|
||||
@ -117,10 +113,8 @@ public boolean tryFence(String argsStr)
|
||||
}
|
||||
LOG.info("Connected to " + args.host);
|
||||
|
||||
int targetPort = args.targetPort != null ?
|
||||
args.targetPort : getDefaultNNPort();
|
||||
try {
|
||||
return doFence(session, targetPort);
|
||||
return doFence(session, args.targetPort);
|
||||
} catch (JSchException e) {
|
||||
LOG.warn("Unable to achieve fencing on remote host", e);
|
||||
return false;
|
||||
@ -142,14 +136,14 @@ private Session createSession(Args args) throws JSchException {
|
||||
return session;
|
||||
}
|
||||
|
||||
private boolean doFence(Session session, int nnPort) throws JSchException {
|
||||
private boolean doFence(Session session, int port) throws JSchException {
|
||||
try {
|
||||
LOG.info("Looking for process running on port " + nnPort);
|
||||
LOG.info("Looking for process running on port " + port);
|
||||
int rc = execCommand(session,
|
||||
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + nnPort);
|
||||
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + port);
|
||||
if (rc == 0) {
|
||||
LOG.info("Successfully killed process that was " +
|
||||
"listening on port " + nnPort);
|
||||
"listening on port " + port);
|
||||
// exit code 0 indicates the process was successfully killed.
|
||||
return true;
|
||||
} else if (rc == 1) {
|
||||
@ -157,7 +151,7 @@ private boolean doFence(Session session, int nnPort) throws JSchException {
|
||||
// or that fuser didn't have root privileges in order to find it
|
||||
// (eg running as a different user)
|
||||
LOG.info(
|
||||
"Indeterminate response from trying to kill NameNode. " +
|
||||
"Indeterminate response from trying to kill service. " +
|
||||
"Verifying whether it is running using nc...");
|
||||
rc = execCommand(session, "nc -z localhost 8020");
|
||||
if (rc == 0) {
|
||||
@ -234,10 +228,6 @@ private Collection<String> getKeyFiles() {
|
||||
return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
|
||||
}
|
||||
|
||||
private int getDefaultNNPort() {
|
||||
return NameNode.getAddress(getConf()).getPort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Container for the parsed arg line for this fencing method.
|
||||
*/
|
||||
@ -251,8 +241,7 @@ static class Args {
|
||||
final String user;
|
||||
final String host;
|
||||
final int sshPort;
|
||||
|
||||
final Integer targetPort;
|
||||
final int targetPort;
|
||||
|
||||
public Args(String args) throws BadFencingConfigurationException {
|
||||
if (args == null) {
|
||||
@ -260,7 +249,7 @@ public Args(String args) throws BadFencingConfigurationException {
|
||||
"Must specify args for ssh fencing configuration");
|
||||
}
|
||||
String[] argList = args.split(",\\s*");
|
||||
if (argList.length > 2 || argList.length == 0) {
|
||||
if (argList.length != 2) {
|
||||
throw new BadFencingConfigurationException(
|
||||
"Incorrect number of arguments: " + args);
|
||||
}
|
||||
@ -287,11 +276,7 @@ public Args(String args) throws BadFencingConfigurationException {
|
||||
}
|
||||
|
||||
// Parse target port.
|
||||
if (argList.length > 1) {
|
||||
targetPort = parseConfiggedPort(argList[1]);
|
||||
} else {
|
||||
targetPort = null;
|
||||
}
|
||||
}
|
||||
|
||||
private Integer parseConfiggedPort(String portStr)
|
@ -23,7 +23,6 @@
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
@ -20,7 +20,7 @@
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
@ -61,7 +61,6 @@ public void testBasicSuccessFailure() {
|
||||
assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testCheckArgs() {
|
||||
try {
|
||||
@ -70,8 +69,9 @@ public void testCheckArgs() {
|
||||
new NodeFencer(conf);
|
||||
fail("Didn't throw when passing no args to shell");
|
||||
} catch (BadFencingConfigurationException confe) {
|
||||
GenericTestUtils.assertExceptionContains(
|
||||
"No argument passed", confe);
|
||||
assertTrue(
|
||||
"Unexpected exception:" + StringUtils.stringifyException(confe),
|
||||
confe.getMessage().contains("No argument passed"));
|
||||
}
|
||||
}
|
||||
|
@ -61,37 +61,37 @@ public void testConnectTimeout() throws BadFencingConfigurationException {
|
||||
SshFenceByTcpPort fence = new SshFenceByTcpPort();
|
||||
fence.setConf(conf);
|
||||
// Connect to Google's DNS server - not running ssh!
|
||||
assertFalse(fence.tryFence("8.8.8.8"));
|
||||
assertFalse(fence.tryFence("8.8.8.8, 1234"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testArgsParsing() throws BadFencingConfigurationException {
|
||||
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234");
|
||||
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234, 5678");
|
||||
assertEquals("foo", args.user);
|
||||
assertEquals("bar.com", args.host);
|
||||
assertEquals(1234, args.sshPort);
|
||||
assertNull(args.targetPort);
|
||||
assertEquals(5678, args.targetPort);
|
||||
|
||||
args = new SshFenceByTcpPort.Args("foo@bar.com");
|
||||
args = new SshFenceByTcpPort.Args("foo@bar.com, 1234");
|
||||
assertEquals("foo", args.user);
|
||||
assertEquals("bar.com", args.host);
|
||||
assertEquals(22, args.sshPort);
|
||||
assertNull(args.targetPort);
|
||||
assertEquals(1234, args.targetPort);
|
||||
|
||||
args = new SshFenceByTcpPort.Args("bar.com");
|
||||
args = new SshFenceByTcpPort.Args("bar.com, 1234");
|
||||
assertEquals(System.getProperty("user.name"), args.user);
|
||||
assertEquals("bar.com", args.host);
|
||||
assertEquals(22, args.sshPort);
|
||||
assertNull(args.targetPort);
|
||||
assertEquals(1234, args.targetPort);
|
||||
|
||||
args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
|
||||
assertEquals(System.getProperty("user.name"), args.user);
|
||||
assertEquals("bar.com", args.host);
|
||||
assertEquals(1234, args.sshPort);
|
||||
assertEquals(Integer.valueOf(12345), args.targetPort);
|
||||
assertEquals(12345, args.targetPort);
|
||||
|
||||
args = new SshFenceByTcpPort.Args("bar, 8020");
|
||||
assertEquals(Integer.valueOf(8020), args.targetPort);
|
||||
assertEquals(8020, args.targetPort);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -101,6 +101,8 @@ public void testBadArgsParsing() throws BadFencingConfigurationException {
|
||||
assertBadArgs("bar.com:");
|
||||
assertBadArgs("bar.com:x");
|
||||
assertBadArgs("foo.com, x");
|
||||
assertBadArgs("foo.com,");
|
||||
assertBadArgs("foo.com, ");
|
||||
}
|
||||
|
||||
private void assertBadArgs(String argStr) {
|
@ -109,10 +109,6 @@
|
||||
<artifactId>ant</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.jcraft</groupId>
|
||||
<artifactId>jsch</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
Loading…
Reference in New Issue
Block a user