HDFS-10220. A large number of expired leases can make namenode unresponsive and cause failover (Nicolas Fraison via raviprak)

This commit is contained in:
Ravi Prakash 2016-06-08 13:44:22 -07:00
parent 0af96a1c08
commit ae047655f4
6 changed files with 94 additions and 22 deletions

View File

@ -397,6 +397,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_DEFAULT = 16384;
public static final int DFS_NAMENODE_MAX_XATTR_SIZE_HARD_LIMIT = 32768;
public static final String DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY =
"dfs.namenode.lease-recheck-interval-ms";
public static final long DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT =
2000;
public static final String
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY =
"dfs.namenode.max-lock-hold-to-release-lease-ms";
public static final long
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25;
public static final String DFS_UPGRADE_DOMAIN_FACTOR = "dfs.namenode.upgrade.domain.factor";
public static final int DFS_UPGRADE_DOMAIN_FACTOR_DEFAULT = DFS_REPLICATION_DEFAULT;

View File

@ -361,7 +361,6 @@ enum BlockUCState {
}
String NAMENODE_LEASE_HOLDER = "HDFS_NameNode";
long NAMENODE_LEASE_RECHECK_INTERVAL = 2000;
String CRYPTO_XATTR_ENCRYPTION_ZONE =
"raw.hdfs.crypto.encryption.zone";

View File

@ -76,6 +76,10 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RETRY_CACHE_HEAP_PERCENT_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
@ -385,7 +389,12 @@ private void logAuditEvent(boolean succeeded,
private final UserGroupInformation fsOwner;
private final String supergroup;
private final boolean standbyShouldCheckpoint;
/** Interval between each check of lease to release. */
private final long leaseRecheckIntervalMs;
/** Maximum time the lock is hold to release lease. */
private final long maxLockHoldToReleaseLeaseMs;
// Scan interval is not configurable.
private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
@ -803,6 +812,13 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY,
DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT);
this.leaseRecheckIntervalMs = conf.getLong(
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY,
DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT);
this.maxLockHoldToReleaseLeaseMs = conf.getLong(
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_KEY,
DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT);
// For testing purposes, allow the DT secret manager to be started regardless
// of whether security is enabled.
alwaysUseDelegationTokensForTests = conf.getBoolean(
@ -847,6 +863,16 @@ public RetryCache getRetryCache() {
return retryCache;
}
@VisibleForTesting
public long getLeaseRecheckIntervalMs() {
return leaseRecheckIntervalMs;
}
@VisibleForTesting
public long getMaxLockHoldToReleaseLeaseMs() {
return maxLockHoldToReleaseLeaseMs;
}
void lockRetryCache() {
if (retryCache != null) {
retryCache.lock();
@ -3116,9 +3142,9 @@ boolean internalReleaseLease(Lease lease, String src, INodesInPath iip,
if(nrCompleteBlocks == nrBlocks) {
finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*"
+ " internalReleaseLease: All existing blocks are COMPLETE,"
+ " lease removed, file closed.");
NameNode.stateChangeLog.warn("BLOCK*" +
" internalReleaseLease: All existing blocks are COMPLETE," +
" lease removed, file " + src + " closed.");
return true; // closed!
}
@ -3155,9 +3181,9 @@ boolean internalReleaseLease(Lease lease, String src, INodesInPath iip,
blockManager.hasMinStorage(lastBlock)) {
finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK*"
+ " internalReleaseLease: Committed blocks are minimally replicated,"
+ " lease removed, file closed.");
NameNode.stateChangeLog.warn("BLOCK*" +
" internalReleaseLease: Committed blocks are minimally" +
" replicated, lease removed, file" + src + " closed.");
return true; // closed!
}
// Cannot close file right now, since some blocks
@ -3200,7 +3226,7 @@ boolean internalReleaseLease(Lease lease, String src, INodesInPath iip,
finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId(), false);
NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
+ "Removed empty last block and closed file.");
+ "Removed empty last block and closed file " + src);
return true;
}
// start recovery of the last block for this file

View File

@ -336,7 +336,7 @@ public void run() {
}
}
Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
Thread.sleep(fsnamesystem.getLeaseRecheckIntervalMs());
} catch(InterruptedException ie) {
if (LOG.isDebugEnabled()) {
LOG.debug(name + " is interrupted", ie);
@ -356,8 +356,11 @@ synchronized boolean checkLeases() {
boolean needSync = false;
assert fsnamesystem.hasWriteLock();
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()) {
Lease leaseToCheck = sortedLeases.poll();
long start = monotonicNow();
while(!sortedLeases.isEmpty() && sortedLeases.peek().expiredHardLimit()
&& !isMaxLockHoldToReleaseLease(start)) {
Lease leaseToCheck = sortedLeases.peek();
LOG.info(leaseToCheck + " has expired hard limit");
final List<Long> removing = new ArrayList<>();
@ -397,6 +400,11 @@ synchronized boolean checkLeases() {
+ leaseToCheck, e);
removing.add(id);
}
if (isMaxLockHoldToReleaseLease(start)) {
LOG.debug("Breaking out of checkLeases after " +
fsnamesystem.getMaxLockHoldToReleaseLeaseMs() + "ms.");
break;
}
}
for(Long id : removing) {
@ -407,6 +415,13 @@ synchronized boolean checkLeases() {
return needSync;
}
/** @return true if max lock hold is reached */
private boolean isMaxLockHoldToReleaseLease(long start) {
return monotonicNow() - start >
fsnamesystem.getMaxLockHoldToReleaseLeaseMs();
}
@Override
public synchronized String toString() {
return getClass().getSimpleName() + "= {"

View File

@ -2589,6 +2589,24 @@
</description>
</property>
<property>
<name>dfs.namenode.lease-recheck-interval-ms</name>
<value>2000</value>
<description>During the release of lease a lock is hold that make any
operations on the namenode stuck. In order to not block them during
a too long duration we stop releasing lease after this max lock limit.
</description>
</property>
<property>
<name>dfs.namenode.max-lock-hold-to-release-lease-ms</name>
<value>25</value>
<description>During the release of lease a lock is hold that make any
operations on the namenode stuck. In order to not block them during
a too long duration we stop releasing lease after this max lock limit.
</description>
</property>
<property>
<name>dfs.namenode.startup.delay.block.deletion.sec</name>
<value>0</value>

View File

@ -19,6 +19,7 @@
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
@ -39,6 +40,8 @@ public class TestLeaseManager {
@Rule
public Timeout timeout = new Timeout(300000);
public static long maxLockHoldToReleaseLeaseMs = 100;
@Test
public void testRemoveLeases() throws Exception {
FSNamesystem fsn = mock(FSNamesystem.class);
@ -57,28 +60,28 @@ public void testRemoveLeases() throws Exception {
assertEquals(0, lm.getINodeIdWithLeases().size());
}
/** Check that even if LeaseManager.checkLease is not able to relinquish
* leases, the Namenode does't enter an infinite loop while holding the FSN
* write lock and thus become unresponsive
/** Check that LeaseManager.checkLease release some leases
*/
@Test
public void testCheckLeaseNotInfiniteLoop() {
public void testCheckLease() {
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
long numLease = 100;
//Make sure the leases we are going to add exceed the hard limit
lm.setLeasePeriod(0, 0);
//Add some leases to the LeaseManager
lm.addLease("holder1", INodeId.ROOT_INODE_ID + 1);
lm.addLease("holder2", INodeId.ROOT_INODE_ID + 2);
lm.addLease("holder3", INodeId.ROOT_INODE_ID + 3);
assertEquals(lm.countLease(), 3);
for (long i = 0; i <= numLease - 1; i++) {
//Add some leases to the LeaseManager
lm.addLease("holder"+i, INodeId.ROOT_INODE_ID + i);
}
assertEquals(numLease, lm.countLease());
//Initiate a call to checkLease. This should exit within the test timeout
lm.checkLeases();
assertTrue(lm.countLease() < numLease);
}
@Test
public void testCountPath() {
LeaseManager lm = new LeaseManager(makeMockFsNameSystem());
@ -112,6 +115,7 @@ private static FSNamesystem makeMockFsNameSystem() {
when(fsn.isRunning()).thenReturn(true);
when(fsn.hasWriteLock()).thenReturn(true);
when(fsn.getFSDirectory()).thenReturn(dir);
when(fsn.getMaxLockHoldToReleaseLeaseMs()).thenReturn(maxLockHoldToReleaseLeaseMs);
return fsn;
}