HADOOP-7961. Move HA fencing to common. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-01-07 00:17:42 +00:00
parent 9a07ba8945
commit 2d0b340c47
11 changed files with 34 additions and 48 deletions

View File

@ -264,6 +264,10 @@
<artifactId>json-simple</artifactId> <artifactId>json-simple</artifactId>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -27,7 +27,6 @@
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.jcraft.jsch.ChannelExec; import com.jcraft.jsch.ChannelExec;
@ -36,11 +35,11 @@
import com.jcraft.jsch.Session; import com.jcraft.jsch.Session;
/** /**
* This fencing implementation sshes to the target node and uses <code>fuser</code> * This fencing implementation sshes to the target node and uses
* to kill the process listening on the NameNode's TCP port. This is * <code>fuser</code> to kill the process listening on the service's
* more accurate than using "jps" since it doesn't require parsing, * TCP port. This is more accurate than using "jps" since it doesn't
* and will work even if there are multiple NameNodes running on the * require parsing, and will work even if there are multiple service
* same machine.<p> * processes running on the same machine.<p>
* It returns a successful status code if: * It returns a successful status code if:
* <ul> * <ul>
* <li><code>fuser</code> indicates it successfully killed a process, <em>or</em> * <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
@ -49,7 +48,7 @@
* <p> * <p>
* This fencing mechanism is configured as following in the fencing method * This fencing mechanism is configured as following in the fencing method
* list: * list:
* <code>sshfence([username@]nnhost[:ssh-port][, target-nn-port])</code> * <code>sshfence([username@]nnhost[:ssh-port], target-port)</code>
* where the first argument specifies the username, host, and port to ssh * where the first argument specifies the username, host, and port to ssh
* into, and the second argument specifies the port on which the target * into, and the second argument specifies the port on which the target
* NN process is listening on. * NN process is listening on.
@ -58,9 +57,6 @@
* <code>other-nn<code> as the current user on the standard SSH port, * <code>other-nn<code> as the current user on the standard SSH port,
* then kill whatever process is listening on port 8020. * then kill whatever process is listening on port 8020.
* <p> * <p>
* If no <code>target-nn-port</code> is specified, it is assumed that the
* target NameNode is listening on the same port as the local NameNode.
* <p>
* In order to achieve passwordless SSH, the operator must also configure * In order to achieve passwordless SSH, the operator must also configure
* <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an * <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
* SSH key that has passphrase-less access to the given username and host. * SSH key that has passphrase-less access to the given username and host.
@ -117,10 +113,8 @@ public boolean tryFence(String argsStr)
} }
LOG.info("Connected to " + args.host); LOG.info("Connected to " + args.host);
int targetPort = args.targetPort != null ?
args.targetPort : getDefaultNNPort();
try { try {
return doFence(session, targetPort); return doFence(session, args.targetPort);
} catch (JSchException e) { } catch (JSchException e) {
LOG.warn("Unable to achieve fencing on remote host", e); LOG.warn("Unable to achieve fencing on remote host", e);
return false; return false;
@ -142,14 +136,14 @@ private Session createSession(Args args) throws JSchException {
return session; return session;
} }
private boolean doFence(Session session, int nnPort) throws JSchException { private boolean doFence(Session session, int port) throws JSchException {
try { try {
LOG.info("Looking for process running on port " + nnPort); LOG.info("Looking for process running on port " + port);
int rc = execCommand(session, int rc = execCommand(session,
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + nnPort); "PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + port);
if (rc == 0) { if (rc == 0) {
LOG.info("Successfully killed process that was " + LOG.info("Successfully killed process that was " +
"listening on port " + nnPort); "listening on port " + port);
// exit code 0 indicates the process was successfully killed. // exit code 0 indicates the process was successfully killed.
return true; return true;
} else if (rc == 1) { } else if (rc == 1) {
@ -157,7 +151,7 @@ private boolean doFence(Session session, int nnPort) throws JSchException {
// or that fuser didn't have root privileges in order to find it // or that fuser didn't have root privileges in order to find it
// (eg running as a different user) // (eg running as a different user)
LOG.info( LOG.info(
"Indeterminate response from trying to kill NameNode. " + "Indeterminate response from trying to kill service. " +
"Verifying whether it is running using nc..."); "Verifying whether it is running using nc...");
rc = execCommand(session, "nc -z localhost 8020"); rc = execCommand(session, "nc -z localhost 8020");
if (rc == 0) { if (rc == 0) {
@ -234,10 +228,6 @@ private Collection<String> getKeyFiles() {
return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY); return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
} }
private int getDefaultNNPort() {
return NameNode.getAddress(getConf()).getPort();
}
/** /**
* Container for the parsed arg line for this fencing method. * Container for the parsed arg line for this fencing method.
*/ */
@ -251,8 +241,7 @@ static class Args {
final String user; final String user;
final String host; final String host;
final int sshPort; final int sshPort;
final int targetPort;
final Integer targetPort;
public Args(String args) throws BadFencingConfigurationException { public Args(String args) throws BadFencingConfigurationException {
if (args == null) { if (args == null) {
@ -260,7 +249,7 @@ public Args(String args) throws BadFencingConfigurationException {
"Must specify args for ssh fencing configuration"); "Must specify args for ssh fencing configuration");
} }
String[] argList = args.split(",\\s*"); String[] argList = args.split(",\\s*");
if (argList.length > 2 || argList.length == 0) { if (argList.length != 2) {
throw new BadFencingConfigurationException( throw new BadFencingConfigurationException(
"Incorrect number of arguments: " + args); "Incorrect number of arguments: " + args);
} }
@ -287,11 +276,7 @@ public Args(String args) throws BadFencingConfigurationException {
} }
// Parse target port. // Parse target port.
if (argList.length > 1) { targetPort = parseConfiggedPort(argList[1]);
targetPort = parseConfiggedPort(argList[1]);
} else {
targetPort = null;
}
} }
private Integer parseConfiggedPort(String portStr) private Integer parseConfiggedPort(String portStr)

View File

@ -23,7 +23,6 @@
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;

View File

@ -20,7 +20,7 @@
import static org.junit.Assert.*; import static org.junit.Assert.*;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -61,7 +61,6 @@ public void testBasicSuccessFailure() {
assertFalse(fencer.tryFence("xxxxxxxxxxxx")); assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
} }
@Test @Test
public void testCheckArgs() { public void testCheckArgs() {
try { try {
@ -70,8 +69,9 @@ public void testCheckArgs() {
new NodeFencer(conf); new NodeFencer(conf);
fail("Didn't throw when passing no args to shell"); fail("Didn't throw when passing no args to shell");
} catch (BadFencingConfigurationException confe) { } catch (BadFencingConfigurationException confe) {
GenericTestUtils.assertExceptionContains( assertTrue(
"No argument passed", confe); "Unexpected exception:" + StringUtils.stringifyException(confe),
confe.getMessage().contains("No argument passed"));
} }
} }

View File

@ -61,37 +61,37 @@ public void testConnectTimeout() throws BadFencingConfigurationException {
SshFenceByTcpPort fence = new SshFenceByTcpPort(); SshFenceByTcpPort fence = new SshFenceByTcpPort();
fence.setConf(conf); fence.setConf(conf);
// Connect to Google's DNS server - not running ssh! // Connect to Google's DNS server - not running ssh!
assertFalse(fence.tryFence("8.8.8.8")); assertFalse(fence.tryFence("8.8.8.8, 1234"));
} }
@Test @Test
public void testArgsParsing() throws BadFencingConfigurationException { public void testArgsParsing() throws BadFencingConfigurationException {
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234"); Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234, 5678");
assertEquals("foo", args.user); assertEquals("foo", args.user);
assertEquals("bar.com", args.host); assertEquals("bar.com", args.host);
assertEquals(1234, args.sshPort); assertEquals(1234, args.sshPort);
assertNull(args.targetPort); assertEquals(5678, args.targetPort);
args = new SshFenceByTcpPort.Args("foo@bar.com"); args = new SshFenceByTcpPort.Args("foo@bar.com, 1234");
assertEquals("foo", args.user); assertEquals("foo", args.user);
assertEquals("bar.com", args.host); assertEquals("bar.com", args.host);
assertEquals(22, args.sshPort); assertEquals(22, args.sshPort);
assertNull(args.targetPort); assertEquals(1234, args.targetPort);
args = new SshFenceByTcpPort.Args("bar.com"); args = new SshFenceByTcpPort.Args("bar.com, 1234");
assertEquals(System.getProperty("user.name"), args.user); assertEquals(System.getProperty("user.name"), args.user);
assertEquals("bar.com", args.host); assertEquals("bar.com", args.host);
assertEquals(22, args.sshPort); assertEquals(22, args.sshPort);
assertNull(args.targetPort); assertEquals(1234, args.targetPort);
args = new SshFenceByTcpPort.Args("bar.com:1234, 12345"); args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
assertEquals(System.getProperty("user.name"), args.user); assertEquals(System.getProperty("user.name"), args.user);
assertEquals("bar.com", args.host); assertEquals("bar.com", args.host);
assertEquals(1234, args.sshPort); assertEquals(1234, args.sshPort);
assertEquals(Integer.valueOf(12345), args.targetPort); assertEquals(12345, args.targetPort);
args = new SshFenceByTcpPort.Args("bar, 8020"); args = new SshFenceByTcpPort.Args("bar, 8020");
assertEquals(Integer.valueOf(8020), args.targetPort); assertEquals(8020, args.targetPort);
} }
@Test @Test
@ -101,6 +101,8 @@ public void testBadArgsParsing() throws BadFencingConfigurationException {
assertBadArgs("bar.com:"); assertBadArgs("bar.com:");
assertBadArgs("bar.com:x"); assertBadArgs("bar.com:x");
assertBadArgs("foo.com, x"); assertBadArgs("foo.com, x");
assertBadArgs("foo.com,");
assertBadArgs("foo.com, ");
} }
private void assertBadArgs(String argStr) { private void assertBadArgs(String argStr) {

View File

@ -109,10 +109,6 @@
<artifactId>ant</artifactId> <artifactId>ant</artifactId>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>
</dependencies> </dependencies>
<build> <build>