HDFS-15023. [SBN read] ZKFC should check the state before joining the election. Contributed by Fei Hui.

This commit is contained in:
Ayush Saxena 2019-12-05 18:52:01 +05:30
parent 682e6fdeda
commit 83a14559e5
3 changed files with 47 additions and 3 deletions

View File

@ -579,6 +579,11 @@ public synchronized void processResult(int rc, String path, Object ctx,
fatalError(errorMessage);
}
@VisibleForTesting
public boolean getWantToBeInElection() {
return wantToBeInElection;
}
/**
* We failed to become active. Re-join the election, but
* sleep for a few seconds after terminating our existing

View File

@ -157,7 +157,10 @@ public HAServiceTarget getLocalTarget() {
return localTarget;
}
HAServiceState getServiceState() { return serviceState; }
@VisibleForTesting
public HAServiceState getServiceState() {
return serviceState;
}
public int run(final String[] args) throws Exception {
if (!localTarget.isAutoFailoverEnabled()) {
@ -799,7 +802,9 @@ private void recheckElectability() {
switch (lastHealthState) {
case SERVICE_HEALTHY:
elector.joinElection(targetToData(localTarget));
if(serviceState != HAServiceState.OBSERVER) {
elector.joinElection(targetToData(localTarget));
}
if (quitElectionOnBadState) {
quitElectionOnBadState = false;
}
@ -909,7 +914,7 @@ protected synchronized void setLastHealthState(HealthMonitor.State newState) {
}
@VisibleForTesting
ActiveStandbyElector getElectorForTests() {
public ActiveStandbyElector getElectorForTests() {
return elector;
}

View File

@ -253,6 +253,32 @@ public void testManualFailoverWithDFSHAAdmin() throws Exception {
waitForHAState(1, HAServiceState.STANDBY);
}
@Test(timeout=30000)
public void testElectionOnObserver() throws Exception{
InputStream inOriginial = System.in;
try {
DFSHAAdmin tool = new DFSHAAdmin();
tool.setConf(conf);
// Transition nn2 to Observer
System.setIn(new ByteArrayInputStream("yes\n".getBytes()));
int result = tool.run(
new String[]{"-transitionToObserver", "-forcemanual", "nn2"});
assertEquals("State transition returned: " + result, 0, result);
waitForHAState(1, HAServiceState.OBSERVER);
waitForZKFCState(thr2.zkfc, HAServiceState.OBSERVER);
// Call recheckElectability
thr2.zkfc.getLocalTarget().getZKFCProxy(conf, 15000).cedeActive(-1);
// This namenode is in observer state, it shouldn't join election
assertEquals(false,
thr2.zkfc.getElectorForTests().getWantToBeInElection());
} finally {
System.setIn(inOriginial);
}
}
private void waitForHAState(int nnidx, final HAServiceState state)
throws TimeoutException, InterruptedException {
final NameNode nn = cluster.getNameNode(nnidx);
@ -269,6 +295,14 @@ public Boolean get() {
}, 50, 15000);
}
private void waitForZKFCState(DFSZKFailoverController zkfc,
final HAServiceState state)
throws TimeoutException, InterruptedException{
GenericTestUtils.waitFor(
() -> zkfc.getServiceState() == state,
50, 15000);
}
/**
* Test-thread which runs a ZK Failover Controller corresponding
* to a given NameNode in the minicluster.