HDFS-4882. Prevent the Namenode's LeaseManager from looping forever in checkLeases (Ravi Prakash via Colin P. McCabe)
This commit is contained in:
parent
555fa2d9d0
commit
daacbc18d7
@ -6226,26 +6226,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||||||
// Calculate number of blocks under construction
|
// Calculate number of blocks under construction
|
||||||
long numUCBlocks = 0;
|
long numUCBlocks = 0;
|
||||||
readLock();
|
readLock();
|
||||||
|
numUCBlocks = leaseManager.getNumUnderConstructionBlocks();
|
||||||
try {
|
try {
|
||||||
for (Lease lease : leaseManager.getSortedLeases()) {
|
|
||||||
for (String path : lease.getPaths()) {
|
|
||||||
final INodeFile cons;
|
|
||||||
try {
|
|
||||||
cons = dir.getINode(path).asFile();
|
|
||||||
Preconditions.checkState(cons.isUnderConstruction());
|
|
||||||
} catch (UnresolvedLinkException e) {
|
|
||||||
throw new AssertionError("Lease files should reside on this FS");
|
|
||||||
}
|
|
||||||
BlockInfo[] blocks = cons.getBlocks();
|
|
||||||
if(blocks == null)
|
|
||||||
continue;
|
|
||||||
for(BlockInfo b : blocks) {
|
|
||||||
if(!b.isComplete())
|
|
||||||
numUCBlocks++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG.info("Number of blocks under construction: " + numUCBlocks);
|
|
||||||
return getBlocksTotal() - numUCBlocks;
|
return getBlocksTotal() - numUCBlocks;
|
||||||
} finally {
|
} finally {
|
||||||
readUnlock();
|
readUnlock();
|
||||||
|
@ -25,8 +25,9 @@ import java.util.Collection;
|
|||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.NavigableSet;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
import java.util.SortedMap;
|
import java.util.SortedMap;
|
||||||
import java.util.SortedSet;
|
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
@ -36,6 +37,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
|||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.util.Daemon;
|
import org.apache.hadoop.util.Daemon;
|
||||||
|
|
||||||
@ -79,7 +81,7 @@ public class LeaseManager {
|
|||||||
//
|
//
|
||||||
private final SortedMap<String, Lease> leases = new TreeMap<String, Lease>();
|
private final SortedMap<String, Lease> leases = new TreeMap<String, Lease>();
|
||||||
// Set of: Lease
|
// Set of: Lease
|
||||||
private final SortedSet<Lease> sortedLeases = new TreeSet<Lease>();
|
private final NavigableSet<Lease> sortedLeases = new TreeSet<Lease>();
|
||||||
|
|
||||||
//
|
//
|
||||||
// Map path names to leases. It is protected by the sortedLeases lock.
|
// Map path names to leases. It is protected by the sortedLeases lock.
|
||||||
@ -96,7 +98,40 @@ public class LeaseManager {
|
|||||||
return leases.get(holder);
|
return leases.get(holder);
|
||||||
}
|
}
|
||||||
|
|
||||||
SortedSet<Lease> getSortedLeases() {return sortedLeases;}
|
@VisibleForTesting
|
||||||
|
int getNumSortedLeases() {return sortedLeases.size();}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method iterates through all the leases and counts the number of blocks
|
||||||
|
* which are not COMPLETE. The FSNamesystem read lock MUST be held before
|
||||||
|
* calling this method.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
synchronized long getNumUnderConstructionBlocks() {
|
||||||
|
assert this.fsnamesystem.hasReadLock() : "The FSNamesystem read lock wasn't"
|
||||||
|
+ "acquired before counting under construction blocks";
|
||||||
|
long numUCBlocks = 0;
|
||||||
|
for (Lease lease : sortedLeases) {
|
||||||
|
for (String path : lease.getPaths()) {
|
||||||
|
final INodeFile cons;
|
||||||
|
try {
|
||||||
|
cons = this.fsnamesystem.getFSDirectory().getINode(path).asFile();
|
||||||
|
Preconditions.checkState(cons.isUnderConstruction());
|
||||||
|
} catch (UnresolvedLinkException e) {
|
||||||
|
throw new AssertionError("Lease files should reside on this FS");
|
||||||
|
}
|
||||||
|
BlockInfo[] blocks = cons.getBlocks();
|
||||||
|
if(blocks == null)
|
||||||
|
continue;
|
||||||
|
for(BlockInfo b : blocks) {
|
||||||
|
if(!b.isComplete())
|
||||||
|
numUCBlocks++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.info("Number of blocks under construction: " + numUCBlocks);
|
||||||
|
return numUCBlocks;
|
||||||
|
}
|
||||||
|
|
||||||
/** @return the lease containing src */
|
/** @return the lease containing src */
|
||||||
public Lease getLeaseByPath(String src) {return sortedLeasesByPath.get(src);}
|
public Lease getLeaseByPath(String src) {return sortedLeasesByPath.get(src);}
|
||||||
@ -421,33 +456,38 @@ public class LeaseManager {
|
|||||||
/** Check the leases beginning from the oldest.
|
/** Check the leases beginning from the oldest.
|
||||||
* @return true is sync is needed.
|
* @return true is sync is needed.
|
||||||
*/
|
*/
|
||||||
private synchronized boolean checkLeases() {
|
@VisibleForTesting
|
||||||
|
synchronized boolean checkLeases() {
|
||||||
boolean needSync = false;
|
boolean needSync = false;
|
||||||
assert fsnamesystem.hasWriteLock();
|
assert fsnamesystem.hasWriteLock();
|
||||||
for(; sortedLeases.size() > 0; ) {
|
Lease leaseToCheck = null;
|
||||||
final Lease oldest = sortedLeases.first();
|
try {
|
||||||
if (!oldest.expiredHardLimit()) {
|
leaseToCheck = sortedLeases.first();
|
||||||
return needSync;
|
} catch(NoSuchElementException e) {}
|
||||||
|
|
||||||
|
while(leaseToCheck != null) {
|
||||||
|
if (!leaseToCheck.expiredHardLimit()) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info(oldest + " has expired hard limit");
|
LOG.info(leaseToCheck + " has expired hard limit");
|
||||||
|
|
||||||
final List<String> removing = new ArrayList<String>();
|
final List<String> removing = new ArrayList<String>();
|
||||||
// need to create a copy of the oldest lease paths, becuase
|
// need to create a copy of the oldest lease paths, because
|
||||||
// internalReleaseLease() removes paths corresponding to empty files,
|
// internalReleaseLease() removes paths corresponding to empty files,
|
||||||
// i.e. it needs to modify the collection being iterated over
|
// i.e. it needs to modify the collection being iterated over
|
||||||
// causing ConcurrentModificationException
|
// causing ConcurrentModificationException
|
||||||
String[] leasePaths = new String[oldest.getPaths().size()];
|
String[] leasePaths = new String[leaseToCheck.getPaths().size()];
|
||||||
oldest.getPaths().toArray(leasePaths);
|
leaseToCheck.getPaths().toArray(leasePaths);
|
||||||
for(String p : leasePaths) {
|
for(String p : leasePaths) {
|
||||||
try {
|
try {
|
||||||
boolean completed = fsnamesystem.internalReleaseLease(oldest, p,
|
boolean completed = fsnamesystem.internalReleaseLease(leaseToCheck, p,
|
||||||
HdfsServerConstants.NAMENODE_LEASE_HOLDER);
|
HdfsServerConstants.NAMENODE_LEASE_HOLDER);
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
if (completed) {
|
if (completed) {
|
||||||
LOG.debug("Lease recovery for " + p + " is complete. File closed.");
|
LOG.debug("Lease recovery for " + p + " is complete. File closed.");
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Started block recovery " + p + " lease " + oldest);
|
LOG.debug("Started block recovery " + p + " lease " + leaseToCheck);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If a lease recovery happened, we need to sync later.
|
// If a lease recovery happened, we need to sync later.
|
||||||
@ -456,15 +496,23 @@ public class LeaseManager {
|
|||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Cannot release the path " + p + " in the lease "
|
LOG.error("Cannot release the path " + p + " in the lease "
|
||||||
+ oldest, e);
|
+ leaseToCheck, e);
|
||||||
removing.add(p);
|
removing.add(p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(String p : removing) {
|
for(String p : removing) {
|
||||||
removeLease(oldest, p);
|
removeLease(leaseToCheck, p);
|
||||||
}
|
}
|
||||||
|
leaseToCheck = sortedLeases.higher(leaseToCheck);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if(leaseToCheck != sortedLeases.first()) {
|
||||||
|
LOG.warn("Unable to release hard-limit expired lease: "
|
||||||
|
+ sortedLeases.first());
|
||||||
|
}
|
||||||
|
} catch(NoSuchElementException e) {}
|
||||||
return needSync;
|
return needSync;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertNull;
|
import static org.junit.Assert.assertNull;
|
||||||
|
|
||||||
@ -54,4 +55,28 @@ public class TestLeaseManager {
|
|||||||
assertNull(lm.getLeaseByPath("/a/b"));
|
assertNull(lm.getLeaseByPath("/a/b"));
|
||||||
assertNull(lm.getLeaseByPath("/a/c"));
|
assertNull(lm.getLeaseByPath("/a/c"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Check that even if LeaseManager.checkLease is not able to relinquish
|
||||||
|
* leases, the Namenode does't enter an infinite loop while holding the FSN
|
||||||
|
* write lock and thus become unresponsive
|
||||||
|
*/
|
||||||
|
@Test (timeout=1000)
|
||||||
|
public void testCheckLeaseNotInfiniteLoop() {
|
||||||
|
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
|
||||||
|
Mockito.when(fsn.isRunning()).thenReturn(true);
|
||||||
|
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
|
||||||
|
LeaseManager lm = new LeaseManager(fsn);
|
||||||
|
|
||||||
|
//Make sure the leases we are going to add exceed the hard limit
|
||||||
|
lm.setLeasePeriod(0,0);
|
||||||
|
|
||||||
|
//Add some leases to the LeaseManager
|
||||||
|
lm.addLease("holder1", "src1");
|
||||||
|
lm.addLease("holder2", "src2");
|
||||||
|
lm.addLease("holder3", "src3");
|
||||||
|
assertEquals(lm.getNumSortedLeases(), 3);
|
||||||
|
|
||||||
|
//Initiate a call to checkLease. This should exit within the test timeout
|
||||||
|
lm.checkLeases();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user