From 8a4ff65b3cf5c9b2d544733956fe22e1d03aefe4 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Tue, 14 May 2013 06:43:47 +0000 Subject: [PATCH] HADOOP-9459. ActiveStandbyElector can join election even before Service HEALTHY, and results in null data at ActiveBreadCrumb. Contributed by Vinay and Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1482227 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 4 ++++ .../apache/hadoop/ha/ActiveStandbyElector.java | 14 ++++++++++++-- .../hadoop/ha/TestActiveStandbyElector.java | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7593a5b6b3..29973aface 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -710,6 +710,10 @@ Release 2.0.5-beta - UNRELEASED HADOOP-9485. No default value in the code for hadoop.rpc.socket.factory.class.default. (Colin Patrick McCabe via atm) + HADOOP-9459. ActiveStandbyElector can join election even before + Service HEALTHY, and results in null data at ActiveBreadCrumb. + (Vinay and todd via todd) + Release 2.0.4-alpha - 2013-04-25 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 6badc5e84c..b6d736e084 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -643,6 +643,8 @@ private void monitorActiveStatus() { } private void joinElectionInternal() { + Preconditions.checkState(appData != null, + "trying to join election without any app data"); if (zkClient == null) { if (!reEstablishSession()) { fatalError("Failed to reEstablish connection with ZooKeeper"); @@ -669,8 +671,14 @@ private void reJoinElection(int sleepTime) { try { terminateConnection(); sleepFor(sleepTime); - - joinElectionInternal(); + // Should not join election even before the SERVICE is reported + // as HEALTHY from ZKFC monitoring. + if (appData != null) { + joinElectionInternal(); + } else { + LOG.info("Not joining election since service has not yet been " + + "reported as healthy."); + } } finally { sessionReestablishLockForTests.unlock(); } @@ -798,6 +806,8 @@ private boolean becomeActive() { */ private void writeBreadCrumbNode(Stat oldBreadcrumbStat) throws KeeperException, InterruptedException { + Preconditions.checkState(appData != null, "no appdata"); + LOG.info("Writing znode " + zkBreadCrumbPath + " to indicate that the local node is the most recent active..."); if (oldBreadcrumbStat == null) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java index c2dc23abcc..309c7ad6ed 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java @@ -721,4 +721,22 @@ public void testWithoutZKServer() throws Exception { GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke); } } + + /** + * joinElection(..) should happen only after SERVICE_HEALTHY. + */ + @Test + public void testBecomeActiveBeforeServiceHealthy() throws Exception { + mockNoPriorActive(); + WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class); + Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.None); + // session expired should enter safe mode + // But for first time, before the SERVICE_HEALTY i.e. appData is set, + // should not enter the election. + Mockito.when(mockEvent.getState()).thenReturn(Event.KeeperState.Expired); + elector.processWatchEvent(mockZK, mockEvent); + // joinElection should not be called. + Mockito.verify(mockZK, Mockito.times(0)).create(ZK_LOCK_NAME, null, + Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, mockZK); + } }