HADOOP-8220. ZKFailoverController doesn't handle failure to become active correctly. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-3042@1307596 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2012-03-30 20:23:59 +00:00
parent 04416861eb
commit 6693167028
6 changed files with 179 additions and 29 deletions

View File

@ -0,0 +1,8 @@
Changes for HDFS-3042 branch.
This change list will be merged into the trunk CHANGES.txt when the HDFS-3-42
branch is merged.
------------------------------
HADOOP-8220. ZKFailoverController doesn't handle failure to become active correctly (todd)

View File

@ -81,9 +81,15 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
*/ */
public interface ActiveStandbyElectorCallback { public interface ActiveStandbyElectorCallback {
/** /**
* This method is called when the app becomes the active leader * This method is called when the app becomes the active leader.
* If the service fails to become active, it should throw
* ServiceFailedException. This will cause the elector to
* sleep for a short period, then re-join the election.
*
* Callback implementations are expected to manage their own
* timeouts (e.g. when making an RPC to a remote node).
*/ */
void becomeActive(); void becomeActive() throws ServiceFailedException;
/** /**
* This method is called when the app becomes a standby * This method is called when the app becomes a standby
@ -135,6 +141,7 @@ public interface ActiveStandbyElectorCallback {
public static final Log LOG = LogFactory.getLog(ActiveStandbyElector.class); public static final Log LOG = LogFactory.getLog(ActiveStandbyElector.class);
private static final int NUM_RETRIES = 3; private static final int NUM_RETRIES = 3;
private static final int SLEEP_AFTER_FAILURE_TO_BECOME_ACTIVE = 1000;
private static enum ConnectionState { private static enum ConnectionState {
DISCONNECTED, CONNECTED, TERMINATED DISCONNECTED, CONNECTED, TERMINATED
@ -385,8 +392,11 @@ public synchronized void processResult(int rc, String path, Object ctx,
Code code = Code.get(rc); Code code = Code.get(rc);
if (isSuccess(code)) { if (isSuccess(code)) {
// we successfully created the znode. we are the leader. start monitoring // we successfully created the znode. we are the leader. start monitoring
becomeActive(); if (becomeActive()) {
monitorActiveStatus(); monitorActiveStatus();
} else {
reJoinElectionAfterFailureToBecomeActive();
}
return; return;
} }
@ -442,7 +452,9 @@ public synchronized void processResult(int rc, String path, Object ctx,
// creation was retried // creation was retried
if (stat.getEphemeralOwner() == zkClient.getSessionId()) { if (stat.getEphemeralOwner() == zkClient.getSessionId()) {
// we own the lock znode. so we are the leader // we own the lock znode. so we are the leader
becomeActive(); if (!becomeActive()) {
reJoinElectionAfterFailureToBecomeActive();
}
} else { } else {
// we dont own the lock znode. so we are a standby. // we dont own the lock znode. so we are a standby.
becomeStandby(); becomeStandby();
@ -479,6 +491,17 @@ public synchronized void processResult(int rc, String path, Object ctx,
fatalError(errorMessage); fatalError(errorMessage);
} }
/**
* We failed to become active. Re-join the election, but
* sleep for a few seconds after terminating our existing
* session, so that other nodes have a chance to become active.
* The failure to become active is already logged inside
* becomeActive().
*/
private void reJoinElectionAfterFailureToBecomeActive() {
reJoinElection(SLEEP_AFTER_FAILURE_TO_BECOME_ACTIVE);
}
/** /**
* interface implementation of Zookeeper watch events (connection and node), * interface implementation of Zookeeper watch events (connection and node),
* proxied by {@link WatcherWithClientRef}. * proxied by {@link WatcherWithClientRef}.
@ -516,7 +539,7 @@ synchronized void processWatchEvent(ZooKeeper zk, WatchedEvent event) {
// call listener to reconnect // call listener to reconnect
LOG.info("Session expired. Entering neutral mode and rejoining..."); LOG.info("Session expired. Entering neutral mode and rejoining...");
enterNeutralMode(); enterNeutralMode();
reJoinElection(); reJoinElection(0);
break; break;
default: default:
fatalError("Unexpected Zookeeper watch event state: " fatalError("Unexpected Zookeeper watch event state: "
@ -591,7 +614,7 @@ private void joinElectionInternal() {
createLockNodeAsync(); createLockNodeAsync();
} }
private void reJoinElection() { private void reJoinElection(int sleepTime) {
LOG.info("Trying to re-establish ZK session"); LOG.info("Trying to re-establish ZK session");
// Some of the test cases rely on expiring the ZK sessions and // Some of the test cases rely on expiring the ZK sessions and
@ -604,12 +627,30 @@ private void reJoinElection() {
sessionReestablishLockForTests.lock(); sessionReestablishLockForTests.lock();
try { try {
terminateConnection(); terminateConnection();
sleepFor(sleepTime);
joinElectionInternal(); joinElectionInternal();
} finally { } finally {
sessionReestablishLockForTests.unlock(); sessionReestablishLockForTests.unlock();
} }
} }
/**
* Sleep for the given number of milliseconds.
* This is non-static, and separated out, so that unit tests
* can override the behavior not to sleep.
*/
@VisibleForTesting
protected void sleepFor(int sleepMs) {
if (sleepMs > 0) {
try {
Thread.sleep(sleepMs);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
@VisibleForTesting @VisibleForTesting
void preventSessionReestablishmentForTests() { void preventSessionReestablishmentForTests() {
sessionReestablishLockForTests.lock(); sessionReestablishLockForTests.lock();
@ -640,11 +681,7 @@ private boolean reEstablishSession() {
success = true; success = true;
} catch(IOException e) { } catch(IOException e) {
LOG.warn(e); LOG.warn(e);
try { sleepFor(5000);
Thread.sleep(5000);
} catch(InterruptedException e1) {
LOG.warn(e1);
}
} }
++connectionRetryCount; ++connectionRetryCount;
} }
@ -675,20 +712,24 @@ private void reset() {
terminateConnection(); terminateConnection();
} }
private void becomeActive() { private boolean becomeActive() {
assert wantToBeInElection; assert wantToBeInElection;
if (state != State.ACTIVE) { if (state == State.ACTIVE) {
try { // already active
Stat oldBreadcrumbStat = fenceOldActive(); return true;
writeBreadCrumbNode(oldBreadcrumbStat); }
} catch (Exception e) { try {
LOG.warn("Exception handling the winning of election", e); Stat oldBreadcrumbStat = fenceOldActive();
reJoinElection(); writeBreadCrumbNode(oldBreadcrumbStat);
return;
}
LOG.debug("Becoming active"); LOG.debug("Becoming active");
state = State.ACTIVE;
appClient.becomeActive(); appClient.becomeActive();
state = State.ACTIVE;
return true;
} catch (Exception e) {
LOG.warn("Exception handling the winning of election", e);
// Caller will handle quitting and rejoining the election.
return false;
} }
} }

View File

@ -242,15 +242,20 @@ private synchronized void fatalError(String err) {
notifyAll(); notifyAll();
} }
private synchronized void becomeActive() { private synchronized void becomeActive() throws ServiceFailedException {
LOG.info("Trying to make " + localTarget + " active..."); LOG.info("Trying to make " + localTarget + " active...");
try { try {
localTarget.getProxy().transitionToActive(); HAServiceProtocolHelper.transitionToActive(localTarget.getProxy());
LOG.info("Successfully transitioned " + localTarget + LOG.info("Successfully transitioned " + localTarget +
" to active state"); " to active state");
} catch (Throwable t) { } catch (Throwable t) {
LOG.fatal("Couldn't make " + localTarget + " active", t); LOG.fatal("Couldn't make " + localTarget + " active", t);
elector.quitElection(true); if (t instanceof ServiceFailedException) {
throw (ServiceFailedException)t;
} else {
throw new ServiceFailedException("Couldn't transition to active",
t);
}
/* /*
* TODO: * TODO:
* we need to make sure that if we get fenced and then quickly restarted, * we need to make sure that if we get fenced and then quickly restarted,
@ -297,7 +302,7 @@ ActiveStandbyElector getElectorForTests() {
*/ */
class ElectorCallbacks implements ActiveStandbyElectorCallback { class ElectorCallbacks implements ActiveStandbyElectorCallback {
@Override @Override
public void becomeActive() { public void becomeActive() throws ServiceFailedException {
ZKFailoverController.this.becomeActive(); ZKFailoverController.this.becomeActive();
} }

View File

@ -19,16 +19,25 @@
import java.util.Arrays; import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.server.ZooKeeperServer; import org.apache.zookeeper.server.ZooKeeperServer;
public abstract class ActiveStandbyElectorTestUtil { public abstract class ActiveStandbyElectorTestUtil {
private static final Log LOG = LogFactory.getLog(
ActiveStandbyElectorTestUtil.class);
private static final long LOG_INTERVAL_MS = 500;
public static void waitForActiveLockData(TestContext ctx, public static void waitForActiveLockData(TestContext ctx,
ZooKeeperServer zks, String parentDir, byte[] activeData) ZooKeeperServer zks, String parentDir, byte[] activeData)
throws Exception { throws Exception {
long st = System.currentTimeMillis();
long lastPrint = st;
while (true) { while (true) {
if (ctx != null) { if (ctx != null) {
ctx.checkException(); ctx.checkException();
@ -42,10 +51,18 @@ public static void waitForActiveLockData(TestContext ctx,
Arrays.equals(activeData, data)) { Arrays.equals(activeData, data)) {
return; return;
} }
if (System.currentTimeMillis() > lastPrint + LOG_INTERVAL_MS) {
LOG.info("Cur data: " + StringUtils.byteToHexString(data));
lastPrint = System.currentTimeMillis();
}
} catch (NoNodeException nne) { } catch (NoNodeException nne) {
if (activeData == null) { if (activeData == null) {
return; return;
} }
if (System.currentTimeMillis() > lastPrint + LOG_INTERVAL_MS) {
LOG.info("Cur data: no node");
lastPrint = System.currentTimeMillis();
}
} }
Thread.sleep(50); Thread.sleep(50);
} }

View File

@ -51,6 +51,8 @@ public class TestActiveStandbyElector {
private ActiveStandbyElectorTester elector; private ActiveStandbyElectorTester elector;
class ActiveStandbyElectorTester extends ActiveStandbyElector { class ActiveStandbyElectorTester extends ActiveStandbyElector {
private int sleptFor = 0;
ActiveStandbyElectorTester(String hostPort, int timeout, String parent, ActiveStandbyElectorTester(String hostPort, int timeout, String parent,
List<ACL> acl, ActiveStandbyElectorCallback app) throws IOException { List<ACL> acl, ActiveStandbyElectorCallback app) throws IOException {
super(hostPort, timeout, parent, acl, app); super(hostPort, timeout, parent, acl, app);
@ -61,6 +63,14 @@ public ZooKeeper getNewZooKeeper() {
++count; ++count;
return mockZK; return mockZK;
} }
@Override
protected void sleepFor(int ms) {
// don't sleep in unit tests! Instead, just record the amount of
// time slept
LOG.info("Would have slept for " + ms + "ms");
sleptFor += ms;
}
} }
private static final String ZK_PARENT_NAME = "/parent/node"; private static final String ZK_PARENT_NAME = "/parent/node";
@ -146,6 +156,68 @@ public void testCreateNodeResultBecomeActive() throws Exception {
verifyExistCall(1); verifyExistCall(1);
} }
/**
* Verify that, when the callback fails to enter active state,
* the elector rejoins the election after sleeping for a short period.
*/
@Test
public void testFailToBecomeActive() throws Exception {
mockNoPriorActive();
elector.joinElection(data);
Assert.assertEquals(0, elector.sleptFor);
Mockito.doThrow(new ServiceFailedException("failed to become active"))
.when(mockApp).becomeActive();
elector.processResult(Code.OK.intValue(), ZK_LOCK_NAME, mockZK,
ZK_LOCK_NAME);
// Should have tried to become active
Mockito.verify(mockApp).becomeActive();
// should re-join
Mockito.verify(mockZK, Mockito.times(2)).create(ZK_LOCK_NAME, data,
Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK);
Assert.assertEquals(2, count);
Assert.assertTrue(elector.sleptFor > 0);
}
/**
* Verify that, when the callback fails to enter active state, after
* a ZK disconnect (i.e from the StatCallback), that the elector rejoins
* the election after sleeping for a short period.
*/
@Test
public void testFailToBecomeActiveAfterZKDisconnect() throws Exception {
mockNoPriorActive();
elector.joinElection(data);
Assert.assertEquals(0, elector.sleptFor);
elector.processResult(Code.CONNECTIONLOSS.intValue(), ZK_LOCK_NAME, mockZK,
ZK_LOCK_NAME);
Mockito.verify(mockZK, Mockito.times(2)).create(ZK_LOCK_NAME, data,
Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK);
elector.processResult(Code.NODEEXISTS.intValue(), ZK_LOCK_NAME, mockZK,
ZK_LOCK_NAME);
verifyExistCall(1);
Stat stat = new Stat();
stat.setEphemeralOwner(1L);
Mockito.when(mockZK.getSessionId()).thenReturn(1L);
// Fake failure to become active from within the stat callback
Mockito.doThrow(new ServiceFailedException("fail to become active"))
.when(mockApp).becomeActive();
elector.processResult(Code.OK.intValue(), ZK_LOCK_NAME, mockZK, stat);
Mockito.verify(mockApp, Mockito.times(1)).becomeActive();
// should re-join
Mockito.verify(mockZK, Mockito.times(3)).create(ZK_LOCK_NAME, data,
Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK);
Assert.assertEquals(2, count);
Assert.assertTrue(elector.sleptFor > 0);
}
/** /**
* Verify that, if there is a record of a prior active node, the * Verify that, if there is a record of a prior active node, the
* elector asks the application to fence it before becoming active. * elector asks the application to fence it before becoming active.

View File

@ -273,7 +273,8 @@ public void testBecomingActiveFails() throws Exception {
waitForHealthState(thr1.zkfc, State.SERVICE_UNHEALTHY); waitForHealthState(thr1.zkfc, State.SERVICE_UNHEALTHY);
waitForActiveLockHolder(null); waitForActiveLockHolder(null);
Mockito.verify(svc2.proxy).transitionToActive(); Mockito.verify(svc2.proxy, Mockito.timeout(2000).atLeastOnce())
.transitionToActive();
waitForHAState(svc1, HAServiceState.STANDBY); waitForHAState(svc1, HAServiceState.STANDBY);
waitForHAState(svc2, HAServiceState.STANDBY); waitForHAState(svc2, HAServiceState.STANDBY);
@ -283,6 +284,12 @@ public void testBecomingActiveFails() throws Exception {
waitForHAState(svc1, HAServiceState.ACTIVE); waitForHAState(svc1, HAServiceState.ACTIVE);
waitForHAState(svc2, HAServiceState.STANDBY); waitForHAState(svc2, HAServiceState.STANDBY);
waitForActiveLockHolder(svc1); waitForActiveLockHolder(svc1);
// Ensure that we can fail back to thr2 once it it is able
// to become active (e.g the admin has restarted it)
LOG.info("Allowing svc2 to become active, expiring svc1");
svc2.failToBecomeActive = false;
expireAndVerifyFailover(thr1, thr2);
} finally { } finally {
stopFCs(); stopFCs();
} }