HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful fence during a failover. Contributed by Vinayakumar B.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1376194 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-08-22 18:54:02 +00:00
parent de8b34a70e
commit d66223fd99
3 changed files with 27 additions and 1 deletions

View File

@ -420,6 +420,9 @@ Branch-2 ( Unreleased changes )
HADOOP-8720. TestLocalFileSystem should use test root subdirectory. HADOOP-8720. TestLocalFileSystem should use test root subdirectory.
(Vlad Rozov via eli) (Vlad Rozov via eli)
HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful
fence during a failover. (Vinayakumar B via atm)
BREAKDOWN OF HDFS-3042 SUBTASKS BREAKDOWN OF HDFS-3042 SUBTASKS
HADOOP-8220. ZKFailoverController doesn't handle failure to become active HADOOP-8220. ZKFailoverController doesn't handle failure to become active

View File

@ -154,6 +154,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
"ha.failover-controller.graceful-fence.rpc-timeout.ms"; "ha.failover-controller.graceful-fence.rpc-timeout.ms";
public static final int HA_FC_GRACEFUL_FENCE_TIMEOUT_DEFAULT = 5000; public static final int HA_FC_GRACEFUL_FENCE_TIMEOUT_DEFAULT = 5000;
/* FC connection retries for graceful fencing */
public static final String HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES =
"ha.failover-controller.graceful-fence.connection.retries";
public static final int HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT = 1;
/* Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState */ /* Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState */
public static final String HA_FC_CLI_CHECK_TIMEOUT_KEY = public static final String HA_FC_CLI_CHECK_TIMEOUT_KEY =
"ha.failover-controller.cli-check.rpc-timeout.ms"; "ha.failover-controller.cli-check.rpc-timeout.ms";

View File

@ -49,16 +49,34 @@ public class FailoverController {
private final int rpcTimeoutToNewActive; private final int rpcTimeoutToNewActive;
private final Configuration conf; private final Configuration conf;
/*
* Need a copy of conf for graceful fence to set
* configurable retries for IPC client.
* Refer HDFS-3561
*/
private final Configuration gracefulFenceConf;
private final RequestSource requestSource; private final RequestSource requestSource;
public FailoverController(Configuration conf, public FailoverController(Configuration conf,
RequestSource source) { RequestSource source) {
this.conf = conf; this.conf = conf;
this.gracefulFenceConf = new Configuration(conf);
this.requestSource = source; this.requestSource = source;
this.gracefulFenceTimeout = getGracefulFenceTimeout(conf); this.gracefulFenceTimeout = getGracefulFenceTimeout(conf);
this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf); this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf);
//Configure less retries for graceful fence
int gracefulFenceConnectRetries = conf.getInt(
CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES,
CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT);
gracefulFenceConf.setInt(
CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
gracefulFenceConnectRetries);
gracefulFenceConf.setInt(
CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
gracefulFenceConnectRetries);
} }
static int getGracefulFenceTimeout(Configuration conf) { static int getGracefulFenceTimeout(Configuration conf) {
@ -150,7 +168,7 @@ private StateChangeRequestInfo createReqInfo() {
boolean tryGracefulFence(HAServiceTarget svc) { boolean tryGracefulFence(HAServiceTarget svc) {
HAServiceProtocol proxy = null; HAServiceProtocol proxy = null;
try { try {
proxy = svc.getProxy(conf, gracefulFenceTimeout); proxy = svc.getProxy(gracefulFenceConf, gracefulFenceTimeout);
proxy.transitionToStandby(createReqInfo()); proxy.transitionToStandby(createReqInfo());
return true; return true;
} catch (ServiceFailedException sfe) { } catch (ServiceFailedException sfe) {