HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful fence during a failover. Contributed by Vinayakumar B.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1376194 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de8b34a70e
commit
d66223fd99
@ -420,6 +420,9 @@ Branch-2 ( Unreleased changes )
|
|||||||
HADOOP-8720. TestLocalFileSystem should use test root subdirectory.
|
HADOOP-8720. TestLocalFileSystem should use test root subdirectory.
|
||||||
(Vlad Rozov via eli)
|
(Vlad Rozov via eli)
|
||||||
|
|
||||||
|
HADOOP-8721. ZKFC should not retry 45 times when attempting a graceful
|
||||||
|
fence during a failover. (Vinayakumar B via atm)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-3042 SUBTASKS
|
BREAKDOWN OF HDFS-3042 SUBTASKS
|
||||||
|
|
||||||
HADOOP-8220. ZKFailoverController doesn't handle failure to become active
|
HADOOP-8220. ZKFailoverController doesn't handle failure to become active
|
||||||
|
@ -154,6 +154,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
|
|||||||
"ha.failover-controller.graceful-fence.rpc-timeout.ms";
|
"ha.failover-controller.graceful-fence.rpc-timeout.ms";
|
||||||
public static final int HA_FC_GRACEFUL_FENCE_TIMEOUT_DEFAULT = 5000;
|
public static final int HA_FC_GRACEFUL_FENCE_TIMEOUT_DEFAULT = 5000;
|
||||||
|
|
||||||
|
/* FC connection retries for graceful fencing */
|
||||||
|
public static final String HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES =
|
||||||
|
"ha.failover-controller.graceful-fence.connection.retries";
|
||||||
|
public static final int HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT = 1;
|
||||||
|
|
||||||
/* Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState */
|
/* Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState */
|
||||||
public static final String HA_FC_CLI_CHECK_TIMEOUT_KEY =
|
public static final String HA_FC_CLI_CHECK_TIMEOUT_KEY =
|
||||||
"ha.failover-controller.cli-check.rpc-timeout.ms";
|
"ha.failover-controller.cli-check.rpc-timeout.ms";
|
||||||
|
@ -49,16 +49,34 @@ public class FailoverController {
|
|||||||
private final int rpcTimeoutToNewActive;
|
private final int rpcTimeoutToNewActive;
|
||||||
|
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
|
/*
|
||||||
|
* Need a copy of conf for graceful fence to set
|
||||||
|
* configurable retries for IPC client.
|
||||||
|
* Refer HDFS-3561
|
||||||
|
*/
|
||||||
|
private final Configuration gracefulFenceConf;
|
||||||
|
|
||||||
private final RequestSource requestSource;
|
private final RequestSource requestSource;
|
||||||
|
|
||||||
public FailoverController(Configuration conf,
|
public FailoverController(Configuration conf,
|
||||||
RequestSource source) {
|
RequestSource source) {
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
this.gracefulFenceConf = new Configuration(conf);
|
||||||
this.requestSource = source;
|
this.requestSource = source;
|
||||||
|
|
||||||
this.gracefulFenceTimeout = getGracefulFenceTimeout(conf);
|
this.gracefulFenceTimeout = getGracefulFenceTimeout(conf);
|
||||||
this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf);
|
this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf);
|
||||||
|
|
||||||
|
//Configure less retries for graceful fence
|
||||||
|
int gracefulFenceConnectRetries = conf.getInt(
|
||||||
|
CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES,
|
||||||
|
CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT);
|
||||||
|
gracefulFenceConf.setInt(
|
||||||
|
CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
|
||||||
|
gracefulFenceConnectRetries);
|
||||||
|
gracefulFenceConf.setInt(
|
||||||
|
CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
|
||||||
|
gracefulFenceConnectRetries);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int getGracefulFenceTimeout(Configuration conf) {
|
static int getGracefulFenceTimeout(Configuration conf) {
|
||||||
@ -150,7 +168,7 @@ private StateChangeRequestInfo createReqInfo() {
|
|||||||
boolean tryGracefulFence(HAServiceTarget svc) {
|
boolean tryGracefulFence(HAServiceTarget svc) {
|
||||||
HAServiceProtocol proxy = null;
|
HAServiceProtocol proxy = null;
|
||||||
try {
|
try {
|
||||||
proxy = svc.getProxy(conf, gracefulFenceTimeout);
|
proxy = svc.getProxy(gracefulFenceConf, gracefulFenceTimeout);
|
||||||
proxy.transitionToStandby(createReqInfo());
|
proxy.transitionToStandby(createReqInfo());
|
||||||
return true;
|
return true;
|
||||||
} catch (ServiceFailedException sfe) {
|
} catch (ServiceFailedException sfe) {
|
||||||
|
Loading…
Reference in New Issue
Block a user