HADOOP-11149. TestZKFailoverController times out. Contributed by Steve Loughran. closes apache/hadoop#51
This commit is contained in:
parent
a4bd54f9d7
commit
053a511919
@ -1355,6 +1355,9 @@ Release 2.8.0 - UNRELEASED
|
||||
|
||||
HADOOP-8419. Fixed GzipCode NPE reset for IBM JDK. (Yu Li via eyang)
|
||||
|
||||
HADOOP-11149. TestZKFailoverController times out. (Steve Loughran
|
||||
via ozawa)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-12051. ProtobufRpcEngine.invoke() should use Exception.toString()
|
||||
|
@ -133,11 +133,13 @@ public void start(int count) throws Exception {
|
||||
* @throws Exception if either of the services had encountered a fatal error
|
||||
*/
|
||||
public void stop() throws Exception {
|
||||
if (thrs != null) {
|
||||
for (DummyZKFCThread thr : thrs) {
|
||||
if (thr != null) {
|
||||
thr.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ctx != null) {
|
||||
ctx.stop();
|
||||
}
|
||||
|
@ -34,14 +34,23 @@
|
||||
import org.apache.zookeeper.ZooKeeper;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.apache.zookeeper.server.auth.DigestAuthenticationProvider;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.Timeout;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
public class TestZKFailoverController extends ClientBaseWithFixes {
|
||||
private Configuration conf;
|
||||
private MiniZKFCCluster cluster;
|
||||
|
||||
/**
|
||||
* Set the timeout for every test
|
||||
*/
|
||||
@Rule
|
||||
public Timeout testTimeout = new Timeout(3 * 60 * 1000);
|
||||
|
||||
// Set up ZK digest-based credentials for the purposes of the tests,
|
||||
// to make sure all of our functionality works with auth and ACLs
|
||||
// present.
|
||||
@ -74,11 +83,21 @@ public void setupConfAndServices() {
|
||||
this.cluster = new MiniZKFCCluster(conf, getServer(serverFactory));
|
||||
}
|
||||
|
||||
@After
|
||||
public void teardown() {
|
||||
if (cluster != null) {
|
||||
try {
|
||||
cluster.stop();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("When stopping the cluster", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Test that the various command lines for formatting the ZK directory
|
||||
* function correctly.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testFormatZK() throws Exception {
|
||||
DummyHAService svc = cluster.getService(1);
|
||||
// Run without formatting the base dir,
|
||||
@ -101,7 +120,7 @@ public void testFormatZK() throws Exception {
|
||||
* Test that if ZooKeeper is not running, the correct error
|
||||
* code is returned.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testNoZK() throws Exception {
|
||||
stopServer();
|
||||
DummyHAService svc = cluster.getService(1);
|
||||
@ -146,7 +165,7 @@ protected String getScopeInsideParentNode() {
|
||||
* Test that automatic failover won't run against a target that hasn't
|
||||
* explicitly enabled the feature.
|
||||
*/
|
||||
@Test(timeout=10000)
|
||||
@Test
|
||||
public void testWontRunWhenAutoFailoverDisabled() throws Exception {
|
||||
DummyHAService svc = cluster.getService(1);
|
||||
svc = Mockito.spy(svc);
|
||||
@ -162,7 +181,7 @@ public void testWontRunWhenAutoFailoverDisabled() throws Exception {
|
||||
* Test that, if ACLs are specified in the configuration, that
|
||||
* it sets the ACLs when formatting the parent node.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testFormatSetsAcls() throws Exception {
|
||||
// Format the base dir, should succeed
|
||||
DummyHAService svc = cluster.getService(1);
|
||||
@ -184,7 +203,7 @@ public void testFormatSetsAcls() throws Exception {
|
||||
* Test that the ZKFC won't run if fencing is not configured for the
|
||||
* local service.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testFencingMustBeConfigured() throws Exception {
|
||||
DummyHAService svc = Mockito.spy(cluster.getService(0));
|
||||
Mockito.doThrow(new BadFencingConfigurationException("no fencing"))
|
||||
@ -202,9 +221,8 @@ public void testFencingMustBeConfigured() throws Exception {
|
||||
* transition is used when possible, falling back to fencing when
|
||||
* the graceful approach fails.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testAutoFailoverOnBadHealth() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
DummyHAService svc1 = cluster.getService(1);
|
||||
|
||||
@ -224,9 +242,6 @@ public void testAutoFailoverOnBadHealth() throws Exception {
|
||||
cluster.waitForHAState(0, HAServiceState.ACTIVE);
|
||||
// and fence svc1
|
||||
Mockito.verify(svc1.fencer).fence(Mockito.same(svc1));
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -235,9 +250,8 @@ public void testAutoFailoverOnBadHealth() throws Exception {
|
||||
* transition is used when possible, falling back to fencing when
|
||||
* the graceful approach fails.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testAutoFailoverOnBadState() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
DummyHAService svc0 = cluster.getService(0);
|
||||
LOG.info("Faking svc0 to change the state, should failover to svc1");
|
||||
@ -245,14 +259,10 @@ public void testAutoFailoverOnBadState() throws Exception {
|
||||
|
||||
// Should fail back to svc0 at this point
|
||||
cluster.waitForHAState(1, HAServiceState.ACTIVE);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testAutoFailoverOnLostZKSession() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
// Expire svc0, it should fail over to svc1
|
||||
@ -268,18 +278,14 @@ public void testAutoFailoverOnLostZKSession() throws Exception {
|
||||
|
||||
// Expire svc1, it should fail back to svc0
|
||||
cluster.expireAndVerifyFailover(1, 0);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that, if the standby node is unhealthy, it doesn't try to become
|
||||
* active
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testDontFailoverToUnhealthyNode() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
// Make svc1 unhealthy, and wait for its FC to notice the bad health.
|
||||
@ -304,18 +310,14 @@ public void testDontFailoverToUnhealthyNode() throws Exception {
|
||||
}
|
||||
// svc0 should get the lock again
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that the ZKFC successfully quits the election when it fails to
|
||||
* become active. This allows the old node to successfully fail back.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testBecomingActiveFails() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
DummyHAService svc1 = cluster.getService(1);
|
||||
|
||||
@ -346,9 +348,6 @@ public void testBecomingActiveFails() throws Exception {
|
||||
LOG.info("Allowing svc1 to become active, expiring svc0");
|
||||
svc1.failToBecomeActive = false;
|
||||
cluster.expireAndVerifyFailover(0, 1);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -356,9 +355,8 @@ public void testBecomingActiveFails() throws Exception {
|
||||
* current state, without triggering any failovers, and without
|
||||
* causing the active node to enter standby state.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testZooKeeperFailure() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
// Record initial ZK sessions
|
||||
@ -396,17 +394,13 @@ public void testZooKeeperFailure() throws Exception {
|
||||
cluster.getElector(0).getZKSessionIdForTests());
|
||||
assertEquals(session1,
|
||||
cluster.getElector(1).getZKSessionIdForTests());
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that the ZKFC can gracefully cede its active status.
|
||||
*/
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testCedeActive() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
DummyZKFC zkfc = cluster.getZkfc(0);
|
||||
// It should be in active to start.
|
||||
@ -434,14 +428,10 @@ public void testCedeActive() throws Exception {
|
||||
assertTrue("Should take ~3 seconds to rejoin. Only took " + (et2 - et) +
|
||||
"ms before rejoining.",
|
||||
et2 - et > 2800);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=25000)
|
||||
@Test
|
||||
public void testGracefulFailover() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
@ -457,14 +447,10 @@ public void testGracefulFailover() throws Exception {
|
||||
assertEquals(0, cluster.getService(1).fenceCount);
|
||||
assertEquals(2, cluster.getService(0).activeTransitionCount);
|
||||
assertEquals(1, cluster.getService(1).activeTransitionCount);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testGracefulFailoverToUnhealthy() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
@ -482,14 +468,10 @@ public void testGracefulFailoverToUnhealthy() throws Exception {
|
||||
cluster.getService(1).toString() +
|
||||
" is not currently healthy.", sfe);
|
||||
}
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testGracefulFailoverFailBecomingActive() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
@ -513,14 +495,10 @@ public void testGracefulFailoverFailBecomingActive() throws Exception {
|
||||
|
||||
// Service 0 should go back to being active after the failed failover
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testGracefulFailoverFailBecomingStandby() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
@ -533,15 +511,11 @@ public void testGracefulFailoverFailBecomingStandby() throws Exception {
|
||||
|
||||
// Check that the old node was fenced
|
||||
assertEquals(1, cluster.getService(0).fenceCount);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=15000)
|
||||
@Test
|
||||
public void testGracefulFailoverFailBecomingStandbyAndFailFence()
|
||||
throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
@ -559,18 +533,14 @@ public void testGracefulFailoverFailBecomingStandbyAndFailFence()
|
||||
GenericTestUtils.assertExceptionContains(
|
||||
"Unable to fence " + cluster.getService(0), sfe);
|
||||
}
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test which exercises all of the inputs into ZKFC. This is particularly
|
||||
* useful for running under jcarder to check for lock order violations.
|
||||
*/
|
||||
@Test(timeout=30000)
|
||||
@Test
|
||||
public void testOneOfEverything() throws Exception {
|
||||
try {
|
||||
cluster.start();
|
||||
|
||||
// Failover by session expiration
|
||||
@ -600,14 +570,10 @@ public void testOneOfEverything() throws Exception {
|
||||
// Graceful failovers
|
||||
cluster.getZkfc(1).gracefulFailoverToYou();
|
||||
cluster.getZkfc(0).gracefulFailoverToYou();
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 25000)
|
||||
@Test
|
||||
public void testGracefulFailoverMultipleZKfcs() throws Exception {
|
||||
try {
|
||||
cluster.start(3);
|
||||
|
||||
cluster.waitForActiveLockHolder(0);
|
||||
@ -632,9 +598,6 @@ public void testGracefulFailoverMultipleZKfcs() throws Exception {
|
||||
assertEquals(2, cluster.getService(0).activeTransitionCount);
|
||||
assertEquals(1, cluster.getService(1).activeTransitionCount);
|
||||
assertEquals(1, cluster.getService(2).activeTransitionCount);
|
||||
} finally {
|
||||
cluster.stop();
|
||||
}
|
||||
}
|
||||
|
||||
private int runFC(DummyHAService target, String ... args) throws Exception {
|
||||
|
Loading…
Reference in New Issue
Block a user