HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1233612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
02919e61f6
commit
1aed1296dd
@ -117,3 +117,5 @@ HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (
|
||||
HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
|
||||
|
||||
HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
|
||||
|
||||
HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)
|
||||
|
@ -337,6 +337,8 @@ private static final void logAuditEvent(UserGroupInformation ugi,
|
||||
*/
|
||||
private HAContext haContext;
|
||||
|
||||
private boolean haEnabled;
|
||||
|
||||
private final Configuration conf;
|
||||
|
||||
PendingDataNodeMessages getPendingDataNodeMessages() {
|
||||
@ -545,6 +547,13 @@ void startActiveServices() throws IOException {
|
||||
if (UserGroupInformation.isSecurityEnabled()) {
|
||||
startSecretManager();
|
||||
}
|
||||
if (haEnabled) {
|
||||
// Renew all of the leases before becoming active.
|
||||
// This is because, while we were in standby mode,
|
||||
// the leases weren't getting renewed on this NN.
|
||||
// Give them all a fresh start here.
|
||||
leaseManager.renewAllLeases();
|
||||
}
|
||||
leaseManager.startMonitor();
|
||||
} finally {
|
||||
writeUnlock();
|
||||
@ -737,8 +746,8 @@ private void setConfigurationParameters(Configuration conf)
|
||||
// block allocation has to be persisted in HA using a shared edits directory
|
||||
// so that the standby has up-to-date namespace information
|
||||
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
|
||||
this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) &&
|
||||
HAUtil.usesSharedEditsDir(conf);
|
||||
this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
|
||||
this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
|
||||
|
||||
short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
|
||||
DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);
|
||||
|
@ -200,6 +200,15 @@ synchronized void renewLease(Lease lease) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Renew all of the currently open leases.
|
||||
*/
|
||||
synchronized void renewAllLeases() {
|
||||
for (Lease l : leases.values()) {
|
||||
renewLease(l);
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************
|
||||
* A Lease governs all the locks held by a single client.
|
||||
* For each client there's a corresponding lease, whose
|
||||
@ -306,6 +315,11 @@ void replacePath(String oldpath, String newpath) {
|
||||
paths.remove(oldpath);
|
||||
paths.add(newpath);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
long getLastUpdate() {
|
||||
return lastUpdate;
|
||||
}
|
||||
}
|
||||
|
||||
synchronized void changeLease(String src, String dst,
|
||||
|
@ -28,6 +28,7 @@
|
||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
|
||||
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
|
||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||
import org.apache.hadoop.ipc.Server;
|
||||
@ -126,6 +127,19 @@ public static String getLeaseHolderForPath(NameNode namenode, String path) {
|
||||
return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the timestamp of the last renewal of the given lease,
|
||||
* or -1 in the case that the lease doesn't exist.
|
||||
*/
|
||||
public static long getLeaseRenewalTime(NameNode nn, String path) {
|
||||
LeaseManager lm = nn.getNamesystem().leaseManager;
|
||||
Lease l = lm.getLeaseByPath(path);
|
||||
if (l == null) {
|
||||
return -1;
|
||||
}
|
||||
return l.getLastUpdate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the datanode descriptor for the given datanode.
|
||||
*/
|
||||
|
@ -24,15 +24,19 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
|
||||
import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
|
||||
import org.apache.tools.ant.taskdefs.WaitFor;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
@ -45,6 +49,7 @@ public class TestHAStateTransitions {
|
||||
TestStandbyIsHot.class);
|
||||
private static final Path TEST_DIR = new Path("/test");
|
||||
private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
|
||||
private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
|
||||
private static final String TEST_FILE_DATA =
|
||||
"Hello state transitioning world";
|
||||
|
||||
@ -191,4 +196,59 @@ public void doAnAction() throws Exception {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for HDFS-2812. Since lease renewals go from the client
|
||||
* only to the active NN, the SBN will have out-of-date lease
|
||||
* info when it becomes active. We need to make sure we don't
|
||||
* accidentally mark the leases as expired when the failover
|
||||
* proceeds.
|
||||
*/
|
||||
@Test(timeout=120000)
|
||||
public void testLeasesRenewedOnTransition() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||
.numDataNodes(1)
|
||||
.build();
|
||||
FSDataOutputStream stm = null;
|
||||
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
|
||||
NameNode nn0 = cluster.getNameNode(0);
|
||||
NameNode nn1 = cluster.getNameNode(1);
|
||||
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
|
||||
nn1.getNamesystem().getEditLogTailer().interrupt();
|
||||
|
||||
try {
|
||||
cluster.waitActive();
|
||||
cluster.transitionToActive(0);
|
||||
|
||||
LOG.info("Starting with NN 0 active");
|
||||
|
||||
stm = fs.create(TEST_FILE_PATH);
|
||||
long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR);
|
||||
assertTrue(nn0t0 > 0);
|
||||
long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
|
||||
assertEquals("Lease should not yet exist on nn1",
|
||||
-1, nn1t0);
|
||||
|
||||
Thread.sleep(5); // make sure time advances!
|
||||
|
||||
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
|
||||
long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
|
||||
assertTrue("Lease should have been created on standby. Time was: " +
|
||||
nn1t1, nn1t1 > nn0t0);
|
||||
|
||||
Thread.sleep(5); // make sure time advances!
|
||||
|
||||
LOG.info("Failing over to NN 1");
|
||||
cluster.transitionToStandby(0);
|
||||
cluster.transitionToActive(1);
|
||||
long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
|
||||
assertTrue("Lease should have been renewed by failover process",
|
||||
nn1t2 > nn1t1);
|
||||
} finally {
|
||||
IOUtils.closeStream(stm);
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user