HDFS-10477. Stop decommission a rack of DataNodes caused NameNode fail over to standby. Contributed by yunjiong zhao and Wei-Chiu Chuang.

This commit is contained in:
Wei-Chiu Chuang 2019-04-03 11:00:12 -07:00
parent 3b0c5016b2
commit be488b6070

View File

@ -4235,8 +4235,15 @@ void processExtraRedundancyBlocksOnInService(
if (!isPopulatingReplQueues()) { if (!isPopulatingReplQueues()) {
return; return;
} }
final Iterator<BlockInfo> it = srcNode.getBlockIterator();
int numExtraRedundancy = 0; int numExtraRedundancy = 0;
for (DatanodeStorageInfo datanodeStorageInfo : srcNode.getStorageInfos()) {
// the namesystem lock is released between iterations. Make sure the
// storage is not removed before continuing.
if (srcNode.getStorageInfo(datanodeStorageInfo.getStorageID()) == null) {
continue;
}
final Iterator<BlockInfo> it = datanodeStorageInfo.getBlockIterator();
while(it.hasNext()) { while(it.hasNext()) {
final BlockInfo block = it.next(); final BlockInfo block = it.next();
if (block.isDeleted()) { if (block.isDeleted()) {
@ -4252,6 +4259,19 @@ void processExtraRedundancyBlocksOnInService(
numExtraRedundancy++; numExtraRedundancy++;
} }
} }
// When called by tests like TestDefaultBlockPlacementPolicy.
// testPlacementWithLocalRackNodesDecommissioned, it is not protected by
// lock, only when called by DatanodeManager.refreshNodes have writeLock
if (namesystem.hasWriteLock()) {
namesystem.writeUnlock();
try {
Thread.sleep(1);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
namesystem.writeLock();
}
}
LOG.info("Invalidated {} extra redundancy blocks on {} after " LOG.info("Invalidated {} extra redundancy blocks on {} after "
+ "it is in service", numExtraRedundancy, srcNode); + "it is in service", numExtraRedundancy, srcNode);
} }