HADOOP-7961. Move HA fencing to common. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-01-07 00:17:42 +00:00
parent 9a07ba8945
commit 2d0b340c47
11 changed files with 34 additions and 48 deletions

View File

@ -264,6 +264,10 @@
<artifactId>json-simple</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>
</dependencies>
<build>

View File

@ -27,7 +27,6 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import com.google.common.annotations.VisibleForTesting;
import com.jcraft.jsch.ChannelExec;
@ -36,11 +35,11 @@
import com.jcraft.jsch.Session;
/**
* This fencing implementation sshes to the target node and uses <code>fuser</code>
* to kill the process listening on the NameNode's TCP port. This is
* more accurate than using "jps" since it doesn't require parsing,
* and will work even if there are multiple NameNodes running on the
* same machine.<p>
* This fencing implementation sshes to the target node and uses
* <code>fuser</code> to kill the process listening on the service's
* TCP port. This is more accurate than using "jps" since it doesn't
* require parsing, and will work even if there are multiple service
* processes running on the same machine.<p>
* It returns a successful status code if:
* <ul>
* <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
@ -49,7 +48,7 @@
* <p>
* This fencing mechanism is configured as following in the fencing method
* list:
* <code>sshfence([username@]nnhost[:ssh-port][, target-nn-port])</code>
* <code>sshfence([username@]nnhost[:ssh-port], target-port)</code>
* where the first argument specifies the username, host, and port to ssh
* into, and the second argument specifies the port on which the target
* NN process is listening on.
@ -58,9 +57,6 @@
* <code>other-nn<code> as the current user on the standard SSH port,
* then kill whatever process is listening on port 8020.
* <p>
* If no <code>target-nn-port</code> is specified, it is assumed that the
* target NameNode is listening on the same port as the local NameNode.
* <p>
* In order to achieve passwordless SSH, the operator must also configure
* <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
* SSH key that has passphrase-less access to the given username and host.
@ -117,10 +113,8 @@ public boolean tryFence(String argsStr)
}
LOG.info("Connected to " + args.host);
int targetPort = args.targetPort != null ?
args.targetPort : getDefaultNNPort();
try {
return doFence(session, targetPort);
return doFence(session, args.targetPort);
} catch (JSchException e) {
LOG.warn("Unable to achieve fencing on remote host", e);
return false;
@ -142,14 +136,14 @@ private Session createSession(Args args) throws JSchException {
return session;
}
private boolean doFence(Session session, int nnPort) throws JSchException {
private boolean doFence(Session session, int port) throws JSchException {
try {
LOG.info("Looking for process running on port " + nnPort);
LOG.info("Looking for process running on port " + port);
int rc = execCommand(session,
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + nnPort);
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + port);
if (rc == 0) {
LOG.info("Successfully killed process that was " +
"listening on port " + nnPort);
"listening on port " + port);
// exit code 0 indicates the process was successfully killed.
return true;
} else if (rc == 1) {
@ -157,7 +151,7 @@ private boolean doFence(Session session, int nnPort) throws JSchException {
// or that fuser didn't have root privileges in order to find it
// (eg running as a different user)
LOG.info(
"Indeterminate response from trying to kill NameNode. " +
"Indeterminate response from trying to kill service. " +
"Verifying whether it is running using nc...");
rc = execCommand(session, "nc -z localhost 8020");
if (rc == 0) {
@ -234,10 +228,6 @@ private Collection<String> getKeyFiles() {
return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
}
private int getDefaultNNPort() {
return NameNode.getAddress(getConf()).getPort();
}
/**
* Container for the parsed arg line for this fencing method.
*/
@ -251,8 +241,7 @@ static class Args {
final String user;
final String host;
final int sshPort;
final Integer targetPort;
final int targetPort;
public Args(String args) throws BadFencingConfigurationException {
if (args == null) {
@ -260,7 +249,7 @@ public Args(String args) throws BadFencingConfigurationException {
"Must specify args for ssh fencing configuration");
}
String[] argList = args.split(",\\s*");
if (argList.length > 2 || argList.length == 0) {
if (argList.length != 2) {
throw new BadFencingConfigurationException(
"Incorrect number of arguments: " + args);
}
@ -287,11 +276,7 @@ public Args(String args) throws BadFencingConfigurationException {
}
// Parse target port.
if (argList.length > 1) {
targetPort = parseConfiggedPort(argList[1]);
} else {
targetPort = null;
}
}
private Integer parseConfiggedPort(String portStr)

View File

@ -23,7 +23,6 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Before;
import org.junit.Test;

View File

@ -20,7 +20,7 @@
import static org.junit.Assert.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.StringUtils;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
@ -61,7 +61,6 @@ public void testBasicSuccessFailure() {
assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
}
@Test
public void testCheckArgs() {
try {
@ -70,8 +69,9 @@ public void testCheckArgs() {
new NodeFencer(conf);
fail("Didn't throw when passing no args to shell");
} catch (BadFencingConfigurationException confe) {
GenericTestUtils.assertExceptionContains(
"No argument passed", confe);
assertTrue(
"Unexpected exception:" + StringUtils.stringifyException(confe),
confe.getMessage().contains("No argument passed"));
}
}

View File

@ -61,37 +61,37 @@ public void testConnectTimeout() throws BadFencingConfigurationException {
SshFenceByTcpPort fence = new SshFenceByTcpPort();
fence.setConf(conf);
// Connect to Google's DNS server - not running ssh!
assertFalse(fence.tryFence("8.8.8.8"));
assertFalse(fence.tryFence("8.8.8.8, 1234"));
}
@Test
public void testArgsParsing() throws BadFencingConfigurationException {
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234");
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234, 5678");
assertEquals("foo", args.user);
assertEquals("bar.com", args.host);
assertEquals(1234, args.sshPort);
assertNull(args.targetPort);
assertEquals(5678, args.targetPort);
args = new SshFenceByTcpPort.Args("foo@bar.com");
args = new SshFenceByTcpPort.Args("foo@bar.com, 1234");
assertEquals("foo", args.user);
assertEquals("bar.com", args.host);
assertEquals(22, args.sshPort);
assertNull(args.targetPort);
assertEquals(1234, args.targetPort);
args = new SshFenceByTcpPort.Args("bar.com");
args = new SshFenceByTcpPort.Args("bar.com, 1234");
assertEquals(System.getProperty("user.name"), args.user);
assertEquals("bar.com", args.host);
assertEquals(22, args.sshPort);
assertNull(args.targetPort);
assertEquals(1234, args.targetPort);
args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
assertEquals(System.getProperty("user.name"), args.user);
assertEquals("bar.com", args.host);
assertEquals(1234, args.sshPort);
assertEquals(Integer.valueOf(12345), args.targetPort);
assertEquals(12345, args.targetPort);
args = new SshFenceByTcpPort.Args("bar, 8020");
assertEquals(Integer.valueOf(8020), args.targetPort);
assertEquals(8020, args.targetPort);
}
@Test
@ -101,6 +101,8 @@ public void testBadArgsParsing() throws BadFencingConfigurationException {
assertBadArgs("bar.com:");
assertBadArgs("bar.com:x");
assertBadArgs("foo.com, x");
assertBadArgs("foo.com,");
assertBadArgs("foo.com, ");
}
private void assertBadArgs(String argStr) {

View File

@ -109,10 +109,6 @@
<artifactId>ant</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</dependency>
</dependencies>
<build>