HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1233612 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
02919e61f6
commit
1aed1296dd
@ -117,3 +117,5 @@ HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (
|
|||||||
HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
|
HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
|
||||||
|
|
||||||
HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
|
HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
|
||||||
|
|
||||||
|
HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)
|
||||||
|
@ -337,6 +337,8 @@ private static final void logAuditEvent(UserGroupInformation ugi,
|
|||||||
*/
|
*/
|
||||||
private HAContext haContext;
|
private HAContext haContext;
|
||||||
|
|
||||||
|
private boolean haEnabled;
|
||||||
|
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
|
|
||||||
PendingDataNodeMessages getPendingDataNodeMessages() {
|
PendingDataNodeMessages getPendingDataNodeMessages() {
|
||||||
@ -545,6 +547,13 @@ void startActiveServices() throws IOException {
|
|||||||
if (UserGroupInformation.isSecurityEnabled()) {
|
if (UserGroupInformation.isSecurityEnabled()) {
|
||||||
startSecretManager();
|
startSecretManager();
|
||||||
}
|
}
|
||||||
|
if (haEnabled) {
|
||||||
|
// Renew all of the leases before becoming active.
|
||||||
|
// This is because, while we were in standby mode,
|
||||||
|
// the leases weren't getting renewed on this NN.
|
||||||
|
// Give them all a fresh start here.
|
||||||
|
leaseManager.renewAllLeases();
|
||||||
|
}
|
||||||
leaseManager.startMonitor();
|
leaseManager.startMonitor();
|
||||||
} finally {
|
} finally {
|
||||||
writeUnlock();
|
writeUnlock();
|
||||||
@ -737,8 +746,8 @@ private void setConfigurationParameters(Configuration conf)
|
|||||||
// block allocation has to be persisted in HA using a shared edits directory
|
// block allocation has to be persisted in HA using a shared edits directory
|
||||||
// so that the standby has up-to-date namespace information
|
// so that the standby has up-to-date namespace information
|
||||||
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
|
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
|
||||||
this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) &&
|
this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
|
||||||
HAUtil.usesSharedEditsDir(conf);
|
this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
|
||||||
|
|
||||||
short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
|
short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
|
||||||
DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);
|
DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);
|
||||||
|
@ -200,6 +200,15 @@ synchronized void renewLease(Lease lease) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Renew all of the currently open leases.
|
||||||
|
*/
|
||||||
|
synchronized void renewAllLeases() {
|
||||||
|
for (Lease l : leases.values()) {
|
||||||
|
renewLease(l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/************************************************************
|
/************************************************************
|
||||||
* A Lease governs all the locks held by a single client.
|
* A Lease governs all the locks held by a single client.
|
||||||
* For each client there's a corresponding lease, whose
|
* For each client there's a corresponding lease, whose
|
||||||
@ -306,6 +315,11 @@ void replacePath(String oldpath, String newpath) {
|
|||||||
paths.remove(oldpath);
|
paths.remove(oldpath);
|
||||||
paths.add(newpath);
|
paths.add(newpath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
long getLastUpdate() {
|
||||||
|
return lastUpdate;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized void changeLease(String src, String dst,
|
synchronized void changeLease(String src, String dst,
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
|
import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
@ -126,6 +127,19 @@ public static String getLeaseHolderForPath(NameNode namenode, String path) {
|
|||||||
return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder();
|
return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the timestamp of the last renewal of the given lease,
|
||||||
|
* or -1 in the case that the lease doesn't exist.
|
||||||
|
*/
|
||||||
|
public static long getLeaseRenewalTime(NameNode nn, String path) {
|
||||||
|
LeaseManager lm = nn.getNamesystem().leaseManager;
|
||||||
|
Lease l = lm.getLeaseByPath(path);
|
||||||
|
if (l == null) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return l.getLastUpdate();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the datanode descriptor for the given datanode.
|
* Return the datanode descriptor for the given datanode.
|
||||||
*/
|
*/
|
||||||
|
@ -24,15 +24,19 @@
|
|||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
|
import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
|
||||||
import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
|
import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
|
||||||
|
import org.apache.tools.ant.taskdefs.WaitFor;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
@ -45,6 +49,7 @@ public class TestHAStateTransitions {
|
|||||||
TestStandbyIsHot.class);
|
TestStandbyIsHot.class);
|
||||||
private static final Path TEST_DIR = new Path("/test");
|
private static final Path TEST_DIR = new Path("/test");
|
||||||
private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
|
private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
|
||||||
|
private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
|
||||||
private static final String TEST_FILE_DATA =
|
private static final String TEST_FILE_DATA =
|
||||||
"Hello state transitioning world";
|
"Hello state transitioning world";
|
||||||
|
|
||||||
@ -191,4 +196,59 @@ public void doAnAction() throws Exception {
|
|||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for HDFS-2812. Since lease renewals go from the client
|
||||||
|
* only to the active NN, the SBN will have out-of-date lease
|
||||||
|
* info when it becomes active. We need to make sure we don't
|
||||||
|
* accidentally mark the leases as expired when the failover
|
||||||
|
* proceeds.
|
||||||
|
*/
|
||||||
|
@Test(timeout=120000)
|
||||||
|
public void testLeasesRenewedOnTransition() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.nnTopology(MiniDFSNNTopology.simpleHATopology())
|
||||||
|
.numDataNodes(1)
|
||||||
|
.build();
|
||||||
|
FSDataOutputStream stm = null;
|
||||||
|
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
|
||||||
|
NameNode nn0 = cluster.getNameNode(0);
|
||||||
|
NameNode nn1 = cluster.getNameNode(1);
|
||||||
|
nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
|
||||||
|
nn1.getNamesystem().getEditLogTailer().interrupt();
|
||||||
|
|
||||||
|
try {
|
||||||
|
cluster.waitActive();
|
||||||
|
cluster.transitionToActive(0);
|
||||||
|
|
||||||
|
LOG.info("Starting with NN 0 active");
|
||||||
|
|
||||||
|
stm = fs.create(TEST_FILE_PATH);
|
||||||
|
long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR);
|
||||||
|
assertTrue(nn0t0 > 0);
|
||||||
|
long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
|
||||||
|
assertEquals("Lease should not yet exist on nn1",
|
||||||
|
-1, nn1t0);
|
||||||
|
|
||||||
|
Thread.sleep(5); // make sure time advances!
|
||||||
|
|
||||||
|
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
|
||||||
|
long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
|
||||||
|
assertTrue("Lease should have been created on standby. Time was: " +
|
||||||
|
nn1t1, nn1t1 > nn0t0);
|
||||||
|
|
||||||
|
Thread.sleep(5); // make sure time advances!
|
||||||
|
|
||||||
|
LOG.info("Failing over to NN 1");
|
||||||
|
cluster.transitionToStandby(0);
|
||||||
|
cluster.transitionToActive(1);
|
||||||
|
long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
|
||||||
|
assertTrue("Lease should have been renewed by failover process",
|
||||||
|
nn1t2 > nn1t1);
|
||||||
|
} finally {
|
||||||
|
IOUtils.closeStream(stm);
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user