HADOOP-9220. Unnecessary transition to standby in ActiveStandbyElector. Contributed by Tom White and Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1482401 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e29170e771
commit
d5a6e764dc
@ -720,6 +720,9 @@ Release 2.0.5-beta - UNRELEASED
|
|||||||
HADOOP-9307. BufferedFSInputStream.read returns wrong results
|
HADOOP-9307. BufferedFSInputStream.read returns wrong results
|
||||||
after certain seeks. (todd)
|
after certain seeks. (todd)
|
||||||
|
|
||||||
|
HADOOP-9220. Unnecessary transition to standby in ActiveStandbyElector.
|
||||||
|
(tom and todd via todd)
|
||||||
|
|
||||||
Release 2.0.4-alpha - 2013-04-25
|
Release 2.0.4-alpha - 2013-04-25
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -159,6 +159,7 @@ static enum State {
|
|||||||
private int createRetryCount = 0;
|
private int createRetryCount = 0;
|
||||||
private int statRetryCount = 0;
|
private int statRetryCount = 0;
|
||||||
private ZooKeeper zkClient;
|
private ZooKeeper zkClient;
|
||||||
|
private WatcherWithClientRef watcher;
|
||||||
private ConnectionState zkConnectionState = ConnectionState.TERMINATED;
|
private ConnectionState zkConnectionState = ConnectionState.TERMINATED;
|
||||||
|
|
||||||
private final ActiveStandbyElectorCallback appClient;
|
private final ActiveStandbyElectorCallback appClient;
|
||||||
@ -246,6 +247,11 @@ public synchronized void joinElection(byte[] data)
|
|||||||
if (data == null) {
|
if (data == null) {
|
||||||
throw new HadoopIllegalArgumentException("data cannot be null");
|
throw new HadoopIllegalArgumentException("data cannot be null");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (wantToBeInElection) {
|
||||||
|
LOG.info("Already in election. Not re-connecting.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
appData = new byte[data.length];
|
appData = new byte[data.length];
|
||||||
System.arraycopy(data, 0, appData, 0, data.length);
|
System.arraycopy(data, 0, appData, 0, data.length);
|
||||||
@ -615,7 +621,7 @@ protected synchronized ZooKeeper getNewZooKeeper() throws IOException,
|
|||||||
// watcher after constructing ZooKeeper, we may miss that event. Instead,
|
// watcher after constructing ZooKeeper, we may miss that event. Instead,
|
||||||
// we construct the watcher first, and have it block any events it receives
|
// we construct the watcher first, and have it block any events it receives
|
||||||
// before we can set its ZooKeeper reference.
|
// before we can set its ZooKeeper reference.
|
||||||
WatcherWithClientRef watcher = new WatcherWithClientRef();
|
watcher = new WatcherWithClientRef();
|
||||||
ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher);
|
ZooKeeper zk = new ZooKeeper(zkHostPort, zkSessionTimeout, watcher);
|
||||||
watcher.setZooKeeperRef(zk);
|
watcher.setZooKeeperRef(zk);
|
||||||
|
|
||||||
@ -753,6 +759,7 @@ private void createConnection() throws IOException, KeeperException {
|
|||||||
e);
|
e);
|
||||||
}
|
}
|
||||||
zkClient = null;
|
zkClient = null;
|
||||||
|
watcher = null;
|
||||||
}
|
}
|
||||||
zkClient = getNewZooKeeper();
|
zkClient = getNewZooKeeper();
|
||||||
LOG.debug("Created new connection for " + this);
|
LOG.debug("Created new connection for " + this);
|
||||||
@ -765,12 +772,14 @@ void terminateConnection() {
|
|||||||
LOG.debug("Terminating ZK connection for " + this);
|
LOG.debug("Terminating ZK connection for " + this);
|
||||||
ZooKeeper tempZk = zkClient;
|
ZooKeeper tempZk = zkClient;
|
||||||
zkClient = null;
|
zkClient = null;
|
||||||
|
watcher = null;
|
||||||
try {
|
try {
|
||||||
tempZk.close();
|
tempZk.close();
|
||||||
} catch(InterruptedException e) {
|
} catch(InterruptedException e) {
|
||||||
LOG.warn(e);
|
LOG.warn(e);
|
||||||
}
|
}
|
||||||
zkConnectionState = ConnectionState.TERMINATED;
|
zkConnectionState = ConnectionState.TERMINATED;
|
||||||
|
wantToBeInElection = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void reset() {
|
private void reset() {
|
||||||
@ -914,7 +923,7 @@ private void createLockNodeAsync() {
|
|||||||
|
|
||||||
private void monitorLockNodeAsync() {
|
private void monitorLockNodeAsync() {
|
||||||
zkClient.exists(zkLockFilePath,
|
zkClient.exists(zkLockFilePath,
|
||||||
new WatcherWithClientRef(zkClient), this,
|
watcher, this,
|
||||||
zkClient);
|
zkClient);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1015,13 +1024,6 @@ private final class WatcherWithClientRef implements Watcher {
|
|||||||
* Latch used to wait until the reference to ZooKeeper is set.
|
* Latch used to wait until the reference to ZooKeeper is set.
|
||||||
*/
|
*/
|
||||||
private CountDownLatch hasSetZooKeeper = new CountDownLatch(1);
|
private CountDownLatch hasSetZooKeeper = new CountDownLatch(1);
|
||||||
|
|
||||||
private WatcherWithClientRef() {
|
|
||||||
}
|
|
||||||
|
|
||||||
private WatcherWithClientRef(ZooKeeper zk) {
|
|
||||||
setZooKeeperRef(zk);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits for the next event from ZooKeeper to arrive.
|
* Waits for the next event from ZooKeeper to arrive.
|
||||||
|
@ -49,6 +49,7 @@ class DummyHAService extends HAServiceTarget {
|
|||||||
|
|
||||||
DummySharedResource sharedResource;
|
DummySharedResource sharedResource;
|
||||||
public int fenceCount = 0;
|
public int fenceCount = 0;
|
||||||
|
public int activeTransitionCount = 0;
|
||||||
|
|
||||||
static ArrayList<DummyHAService> instances = Lists.newArrayList();
|
static ArrayList<DummyHAService> instances = Lists.newArrayList();
|
||||||
int index;
|
int index;
|
||||||
@ -139,6 +140,7 @@ public void monitorHealth() throws HealthCheckFailedException,
|
|||||||
@Override
|
@Override
|
||||||
public void transitionToActive(StateChangeRequestInfo req) throws ServiceFailedException,
|
public void transitionToActive(StateChangeRequestInfo req) throws ServiceFailedException,
|
||||||
AccessControlException, IOException {
|
AccessControlException, IOException {
|
||||||
|
activeTransitionCount++;
|
||||||
checkUnreachable();
|
checkUnreachable();
|
||||||
if (failToBecomeActive) {
|
if (failToBecomeActive) {
|
||||||
throw new ServiceFailedException("injected failure");
|
throw new ServiceFailedException("injected failure");
|
||||||
|
@ -422,7 +422,7 @@ public void testCedeActive() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=15000)
|
@Test(timeout=25000)
|
||||||
public void testGracefulFailover() throws Exception {
|
public void testGracefulFailover() throws Exception {
|
||||||
try {
|
try {
|
||||||
cluster.start();
|
cluster.start();
|
||||||
@ -430,11 +430,16 @@ public void testGracefulFailover() throws Exception {
|
|||||||
cluster.waitForActiveLockHolder(0);
|
cluster.waitForActiveLockHolder(0);
|
||||||
cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover();
|
cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover();
|
||||||
cluster.waitForActiveLockHolder(1);
|
cluster.waitForActiveLockHolder(1);
|
||||||
|
|
||||||
cluster.getService(0).getZKFCProxy(conf, 5000).gracefulFailover();
|
cluster.getService(0).getZKFCProxy(conf, 5000).gracefulFailover();
|
||||||
cluster.waitForActiveLockHolder(0);
|
cluster.waitForActiveLockHolder(0);
|
||||||
|
|
||||||
|
Thread.sleep(10000); // allow to quiesce
|
||||||
|
|
||||||
assertEquals(0, cluster.getService(0).fenceCount);
|
assertEquals(0, cluster.getService(0).fenceCount);
|
||||||
assertEquals(0, cluster.getService(1).fenceCount);
|
assertEquals(0, cluster.getService(1).fenceCount);
|
||||||
|
assertEquals(2, cluster.getService(0).activeTransitionCount);
|
||||||
|
assertEquals(1, cluster.getService(1).activeTransitionCount);
|
||||||
} finally {
|
} finally {
|
||||||
cluster.stop();
|
cluster.stop();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user