HDFS-9549. TestCacheDirectives#testExceedsCapacity is flaky (Xiao Chen via cmccabe)
This commit is contained in:
parent
d9c409a428
commit
211c78c090
@ -2816,6 +2816,9 @@ Release 2.8.0 - UNRELEASED
|
||||
HDFS-9842. dfs.datanode.balance.bandwidthPerSec should accept friendly
|
||||
size units. (Lin Yiqun via Arpit Agarwal)
|
||||
|
||||
HDFS-9549. TestCacheDirectives#testExceedsCapacity is flaky (Xiao Chen via
|
||||
cmccabe)
|
||||
|
||||
Release 2.7.3 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -27,6 +27,7 @@
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@ -491,6 +492,26 @@ private String findReasonForNotCaching(CachedBlock cblock,
|
||||
* Blocks that are over-replicated should be removed from Datanodes.
|
||||
*/
|
||||
private void rescanCachedBlockMap() {
|
||||
// Remove pendingCached blocks that will make DN out-of-capacity.
|
||||
Set<DatanodeDescriptor> datanodes =
|
||||
blockManager.getDatanodeManager().getDatanodes();
|
||||
for (DatanodeDescriptor dn : datanodes) {
|
||||
long remaining = dn.getCacheRemaining();
|
||||
for (Iterator<CachedBlock> it = dn.getPendingCached().iterator();
|
||||
it.hasNext();) {
|
||||
CachedBlock cblock = it.next();
|
||||
BlockInfo blockInfo = blockManager.
|
||||
getStoredBlock(new Block(cblock.getBlockId()));
|
||||
if (blockInfo.getNumBytes() > remaining) {
|
||||
LOG.debug("Block {}: removing from PENDING_CACHED for node {} "
|
||||
+ "because it cannot fit in remaining cache size {}.",
|
||||
cblock.getBlockId(), dn.getDatanodeUuid(), remaining);
|
||||
it.remove();
|
||||
} else {
|
||||
remaining -= blockInfo.getNumBytes();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (Iterator<CachedBlock> cbIter = cachedBlocks.iterator();
|
||||
cbIter.hasNext(); ) {
|
||||
scannedBlocks++;
|
||||
@ -687,7 +708,7 @@ private void addNewPendingCached(final int neededCached,
|
||||
if (pendingCapacity < blockInfo.getNumBytes()) {
|
||||
LOG.trace("Block {}: DataNode {} is not a valid possibility " +
|
||||
"because the block has size {}, but the DataNode only has {} " +
|
||||
"bytes of cache remaining ({} pending bytes, {} already cached.",
|
||||
"bytes of cache remaining ({} pending bytes, {} already cached.)",
|
||||
blockInfo.getBlockId(), datanode.getDatanodeUuid(),
|
||||
blockInfo.getNumBytes(), pendingCapacity, pendingBytes,
|
||||
datanode.getCacheRemaining());
|
||||
|
@ -413,6 +413,15 @@ public DatanodeDescriptor getDatanodeByXferAddr(String host, int xferPort) {
|
||||
return host2DatanodeMap.getDatanodeByXferAddr(host, xferPort);
|
||||
}
|
||||
|
||||
/** @return the datanode descriptors for all nodes. */
|
||||
public Set<DatanodeDescriptor> getDatanodes() {
|
||||
final Set<DatanodeDescriptor> datanodes;
|
||||
synchronized (this) {
|
||||
datanodes = new HashSet<>(datanodeMap.values());
|
||||
}
|
||||
return datanodes;
|
||||
}
|
||||
|
||||
/** @return the Host2NodesMap */
|
||||
public Host2NodesMap getHost2DatanodeMap() {
|
||||
return this.host2DatanodeMap;
|
||||
|
@ -72,7 +72,6 @@
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
||||
@ -86,9 +85,6 @@
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.util.GSet;
|
||||
import org.apache.log4j.Level;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
@ -1470,6 +1466,7 @@ public void testMaxRelativeExpiry() throws Exception {
|
||||
*/
|
||||
private void checkPendingCachedEmpty(MiniDFSCluster cluster)
|
||||
throws Exception {
|
||||
Thread.sleep(1000);
|
||||
cluster.getNamesystem().readLock();
|
||||
try {
|
||||
final DatanodeManager datanodeManager =
|
||||
@ -1501,7 +1498,6 @@ public void testExceedsCapacity() throws Exception {
|
||||
waitForCachedBlocks(namenode, -1, numCachedReplicas,
|
||||
"testExceeds:1");
|
||||
checkPendingCachedEmpty(cluster);
|
||||
Thread.sleep(1000);
|
||||
checkPendingCachedEmpty(cluster);
|
||||
|
||||
// Try creating a file with giant-sized blocks that exceed cache capacity
|
||||
@ -1509,7 +1505,6 @@ public void testExceedsCapacity() throws Exception {
|
||||
DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2,
|
||||
(short) 1, 0xFADED);
|
||||
checkPendingCachedEmpty(cluster);
|
||||
Thread.sleep(1000);
|
||||
checkPendingCachedEmpty(cluster);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user