HADOOP-7961. Move HA fencing to common. Contributed by Eli Collins
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228510 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9a07ba8945
commit
2d0b340c47
@ -264,6 +264,10 @@
|
|||||||
<artifactId>json-simple</artifactId>
|
<artifactId>json-simple</artifactId>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.jcraft</groupId>
|
||||||
|
<artifactId>jsch</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
@ -27,7 +27,6 @@
|
|||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.jcraft.jsch.ChannelExec;
|
import com.jcraft.jsch.ChannelExec;
|
||||||
@ -36,11 +35,11 @@
|
|||||||
import com.jcraft.jsch.Session;
|
import com.jcraft.jsch.Session;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This fencing implementation sshes to the target node and uses <code>fuser</code>
|
* This fencing implementation sshes to the target node and uses
|
||||||
* to kill the process listening on the NameNode's TCP port. This is
|
* <code>fuser</code> to kill the process listening on the service's
|
||||||
* more accurate than using "jps" since it doesn't require parsing,
|
* TCP port. This is more accurate than using "jps" since it doesn't
|
||||||
* and will work even if there are multiple NameNodes running on the
|
* require parsing, and will work even if there are multiple service
|
||||||
* same machine.<p>
|
* processes running on the same machine.<p>
|
||||||
* It returns a successful status code if:
|
* It returns a successful status code if:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
|
* <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
|
||||||
@ -49,7 +48,7 @@
|
|||||||
* <p>
|
* <p>
|
||||||
* This fencing mechanism is configured as following in the fencing method
|
* This fencing mechanism is configured as following in the fencing method
|
||||||
* list:
|
* list:
|
||||||
* <code>sshfence([username@]nnhost[:ssh-port][, target-nn-port])</code>
|
* <code>sshfence([username@]nnhost[:ssh-port], target-port)</code>
|
||||||
* where the first argument specifies the username, host, and port to ssh
|
* where the first argument specifies the username, host, and port to ssh
|
||||||
* into, and the second argument specifies the port on which the target
|
* into, and the second argument specifies the port on which the target
|
||||||
* NN process is listening on.
|
* NN process is listening on.
|
||||||
@ -58,9 +57,6 @@
|
|||||||
* <code>other-nn<code> as the current user on the standard SSH port,
|
* <code>other-nn<code> as the current user on the standard SSH port,
|
||||||
* then kill whatever process is listening on port 8020.
|
* then kill whatever process is listening on port 8020.
|
||||||
* <p>
|
* <p>
|
||||||
* If no <code>target-nn-port</code> is specified, it is assumed that the
|
|
||||||
* target NameNode is listening on the same port as the local NameNode.
|
|
||||||
* <p>
|
|
||||||
* In order to achieve passwordless SSH, the operator must also configure
|
* In order to achieve passwordless SSH, the operator must also configure
|
||||||
* <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
|
* <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
|
||||||
* SSH key that has passphrase-less access to the given username and host.
|
* SSH key that has passphrase-less access to the given username and host.
|
||||||
@ -117,10 +113,8 @@ public boolean tryFence(String argsStr)
|
|||||||
}
|
}
|
||||||
LOG.info("Connected to " + args.host);
|
LOG.info("Connected to " + args.host);
|
||||||
|
|
||||||
int targetPort = args.targetPort != null ?
|
|
||||||
args.targetPort : getDefaultNNPort();
|
|
||||||
try {
|
try {
|
||||||
return doFence(session, targetPort);
|
return doFence(session, args.targetPort);
|
||||||
} catch (JSchException e) {
|
} catch (JSchException e) {
|
||||||
LOG.warn("Unable to achieve fencing on remote host", e);
|
LOG.warn("Unable to achieve fencing on remote host", e);
|
||||||
return false;
|
return false;
|
||||||
@ -142,14 +136,14 @@ private Session createSession(Args args) throws JSchException {
|
|||||||
return session;
|
return session;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean doFence(Session session, int nnPort) throws JSchException {
|
private boolean doFence(Session session, int port) throws JSchException {
|
||||||
try {
|
try {
|
||||||
LOG.info("Looking for process running on port " + nnPort);
|
LOG.info("Looking for process running on port " + port);
|
||||||
int rc = execCommand(session,
|
int rc = execCommand(session,
|
||||||
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + nnPort);
|
"PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + port);
|
||||||
if (rc == 0) {
|
if (rc == 0) {
|
||||||
LOG.info("Successfully killed process that was " +
|
LOG.info("Successfully killed process that was " +
|
||||||
"listening on port " + nnPort);
|
"listening on port " + port);
|
||||||
// exit code 0 indicates the process was successfully killed.
|
// exit code 0 indicates the process was successfully killed.
|
||||||
return true;
|
return true;
|
||||||
} else if (rc == 1) {
|
} else if (rc == 1) {
|
||||||
@ -157,7 +151,7 @@ private boolean doFence(Session session, int nnPort) throws JSchException {
|
|||||||
// or that fuser didn't have root privileges in order to find it
|
// or that fuser didn't have root privileges in order to find it
|
||||||
// (eg running as a different user)
|
// (eg running as a different user)
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Indeterminate response from trying to kill NameNode. " +
|
"Indeterminate response from trying to kill service. " +
|
||||||
"Verifying whether it is running using nc...");
|
"Verifying whether it is running using nc...");
|
||||||
rc = execCommand(session, "nc -z localhost 8020");
|
rc = execCommand(session, "nc -z localhost 8020");
|
||||||
if (rc == 0) {
|
if (rc == 0) {
|
||||||
@ -234,10 +228,6 @@ private Collection<String> getKeyFiles() {
|
|||||||
return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
|
return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getDefaultNNPort() {
|
|
||||||
return NameNode.getAddress(getConf()).getPort();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Container for the parsed arg line for this fencing method.
|
* Container for the parsed arg line for this fencing method.
|
||||||
*/
|
*/
|
||||||
@ -251,8 +241,7 @@ static class Args {
|
|||||||
final String user;
|
final String user;
|
||||||
final String host;
|
final String host;
|
||||||
final int sshPort;
|
final int sshPort;
|
||||||
|
final int targetPort;
|
||||||
final Integer targetPort;
|
|
||||||
|
|
||||||
public Args(String args) throws BadFencingConfigurationException {
|
public Args(String args) throws BadFencingConfigurationException {
|
||||||
if (args == null) {
|
if (args == null) {
|
||||||
@ -260,7 +249,7 @@ public Args(String args) throws BadFencingConfigurationException {
|
|||||||
"Must specify args for ssh fencing configuration");
|
"Must specify args for ssh fencing configuration");
|
||||||
}
|
}
|
||||||
String[] argList = args.split(",\\s*");
|
String[] argList = args.split(",\\s*");
|
||||||
if (argList.length > 2 || argList.length == 0) {
|
if (argList.length != 2) {
|
||||||
throw new BadFencingConfigurationException(
|
throw new BadFencingConfigurationException(
|
||||||
"Incorrect number of arguments: " + args);
|
"Incorrect number of arguments: " + args);
|
||||||
}
|
}
|
||||||
@ -287,11 +276,7 @@ public Args(String args) throws BadFencingConfigurationException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse target port.
|
// Parse target port.
|
||||||
if (argList.length > 1) {
|
targetPort = parseConfiggedPort(argList[1]);
|
||||||
targetPort = parseConfiggedPort(argList[1]);
|
|
||||||
} else {
|
|
||||||
targetPort = null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Integer parseConfiggedPort(String portStr)
|
private Integer parseConfiggedPort(String portStr)
|
@ -23,7 +23,6 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -20,7 +20,7 @@
|
|||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@ -61,7 +61,6 @@ public void testBasicSuccessFailure() {
|
|||||||
assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
|
assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCheckArgs() {
|
public void testCheckArgs() {
|
||||||
try {
|
try {
|
||||||
@ -70,8 +69,9 @@ public void testCheckArgs() {
|
|||||||
new NodeFencer(conf);
|
new NodeFencer(conf);
|
||||||
fail("Didn't throw when passing no args to shell");
|
fail("Didn't throw when passing no args to shell");
|
||||||
} catch (BadFencingConfigurationException confe) {
|
} catch (BadFencingConfigurationException confe) {
|
||||||
GenericTestUtils.assertExceptionContains(
|
assertTrue(
|
||||||
"No argument passed", confe);
|
"Unexpected exception:" + StringUtils.stringifyException(confe),
|
||||||
|
confe.getMessage().contains("No argument passed"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -61,37 +61,37 @@ public void testConnectTimeout() throws BadFencingConfigurationException {
|
|||||||
SshFenceByTcpPort fence = new SshFenceByTcpPort();
|
SshFenceByTcpPort fence = new SshFenceByTcpPort();
|
||||||
fence.setConf(conf);
|
fence.setConf(conf);
|
||||||
// Connect to Google's DNS server - not running ssh!
|
// Connect to Google's DNS server - not running ssh!
|
||||||
assertFalse(fence.tryFence("8.8.8.8"));
|
assertFalse(fence.tryFence("8.8.8.8, 1234"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testArgsParsing() throws BadFencingConfigurationException {
|
public void testArgsParsing() throws BadFencingConfigurationException {
|
||||||
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234");
|
Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234, 5678");
|
||||||
assertEquals("foo", args.user);
|
assertEquals("foo", args.user);
|
||||||
assertEquals("bar.com", args.host);
|
assertEquals("bar.com", args.host);
|
||||||
assertEquals(1234, args.sshPort);
|
assertEquals(1234, args.sshPort);
|
||||||
assertNull(args.targetPort);
|
assertEquals(5678, args.targetPort);
|
||||||
|
|
||||||
args = new SshFenceByTcpPort.Args("foo@bar.com");
|
args = new SshFenceByTcpPort.Args("foo@bar.com, 1234");
|
||||||
assertEquals("foo", args.user);
|
assertEquals("foo", args.user);
|
||||||
assertEquals("bar.com", args.host);
|
assertEquals("bar.com", args.host);
|
||||||
assertEquals(22, args.sshPort);
|
assertEquals(22, args.sshPort);
|
||||||
assertNull(args.targetPort);
|
assertEquals(1234, args.targetPort);
|
||||||
|
|
||||||
args = new SshFenceByTcpPort.Args("bar.com");
|
args = new SshFenceByTcpPort.Args("bar.com, 1234");
|
||||||
assertEquals(System.getProperty("user.name"), args.user);
|
assertEquals(System.getProperty("user.name"), args.user);
|
||||||
assertEquals("bar.com", args.host);
|
assertEquals("bar.com", args.host);
|
||||||
assertEquals(22, args.sshPort);
|
assertEquals(22, args.sshPort);
|
||||||
assertNull(args.targetPort);
|
assertEquals(1234, args.targetPort);
|
||||||
|
|
||||||
args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
|
args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
|
||||||
assertEquals(System.getProperty("user.name"), args.user);
|
assertEquals(System.getProperty("user.name"), args.user);
|
||||||
assertEquals("bar.com", args.host);
|
assertEquals("bar.com", args.host);
|
||||||
assertEquals(1234, args.sshPort);
|
assertEquals(1234, args.sshPort);
|
||||||
assertEquals(Integer.valueOf(12345), args.targetPort);
|
assertEquals(12345, args.targetPort);
|
||||||
|
|
||||||
args = new SshFenceByTcpPort.Args("bar, 8020");
|
args = new SshFenceByTcpPort.Args("bar, 8020");
|
||||||
assertEquals(Integer.valueOf(8020), args.targetPort);
|
assertEquals(8020, args.targetPort);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -101,6 +101,8 @@ public void testBadArgsParsing() throws BadFencingConfigurationException {
|
|||||||
assertBadArgs("bar.com:");
|
assertBadArgs("bar.com:");
|
||||||
assertBadArgs("bar.com:x");
|
assertBadArgs("bar.com:x");
|
||||||
assertBadArgs("foo.com, x");
|
assertBadArgs("foo.com, x");
|
||||||
|
assertBadArgs("foo.com,");
|
||||||
|
assertBadArgs("foo.com, ");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertBadArgs(String argStr) {
|
private void assertBadArgs(String argStr) {
|
@ -109,10 +109,6 @@
|
|||||||
<artifactId>ant</artifactId>
|
<artifactId>ant</artifactId>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.jcraft</groupId>
|
|
||||||
<artifactId>jsch</artifactId>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
Loading…
Reference in New Issue
Block a user