HDFS-5579. Under construction files make DataNode decommission take very long hours. Contributed by zhaoyunjiong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1557904 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2014-01-14 02:02:15 +00:00
parent e06ae2d567
commit e210519d32
5 changed files with 68 additions and 8 deletions

View File

@ -749,6 +749,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5710. FSDirectory#getFullPathName should check inodes against null.
(Uma Maheswara Rao G via jing9)
HDFS-5579. Under construction files make DataNode decommission take very long
hours. (zhaoyunjiong via jing9)
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
HDFS-4985. Add storage type to the protocol and expose it in block report

View File

@ -31,7 +31,7 @@ public interface BlockCollection {
/**
* Get the last block of the collection.
*/
public BlockInfo getLastBlock() throws IOException;
public BlockInfo getLastBlock();
/**
* Get content summary.

View File

@ -1214,8 +1214,10 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
// block should belong to a file
bc = blocksMap.getBlockCollection(block);
// abandoned block or block reopened for append
if(bc == null || bc.isUnderConstruction()) {
neededReplications.remove(block, priority); // remove from neededReplications
if (bc == null
|| (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
// remove from neededReplications
neededReplications.remove(block, priority);
continue;
}
@ -1295,7 +1297,7 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
// block should belong to a file
bc = blocksMap.getBlockCollection(block);
// abandoned block or block reopened for append
if(bc == null || bc.isUnderConstruction()) {
if(bc == null || (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
neededReplications.remove(block, priority); // remove from neededReplications
rw.targets = null;
continue;
@ -2906,8 +2908,16 @@ boolean isReplicationInProgress(DatanodeDescriptor srcNode) {
NumberReplicas num = countNodes(block);
int curReplicas = num.liveReplicas();
int curExpectedReplicas = getReplication(block);
if (isNeededReplication(block, curExpectedReplicas, curReplicas)) {
if (curExpectedReplicas > curReplicas) {
if (bc.isUnderConstruction()) {
if (block.equals(bc.getLastBlock()) && curReplicas > minReplication) {
continue;
}
underReplicatedInOpenFiles++;
}
// Log info about one block for this node which needs replication
if (!status) {
status = true;
@ -2924,9 +2934,6 @@ boolean isReplicationInProgress(DatanodeDescriptor srcNode) {
if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) {
decommissionOnlyReplicas++;
}
if (bc.isUnderConstruction()) {
underReplicatedInOpenFiles++;
}
}
if (!neededReplications.contains(block) &&
pendingReplications.getNumReplicas(block) == 0) {

View File

@ -640,7 +640,7 @@ BlockInfo getPenultimateBlock() {
}
@Override
public BlockInfo getLastBlock() throws IOException {
public BlockInfo getLastBlock() {
return blocks == null || blocks.length == 0? null: blocks[blocks.length-1];
}

View File

@ -42,6 +42,7 @@
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@ -779,4 +780,53 @@ public void testIncludeByRegistrationName() throws IOException,
Thread.sleep(HEARTBEAT_INTERVAL * 1000);
}
}
@Test(timeout=120000)
public void testDecommissionWithOpenfile() throws IOException, InterruptedException {
LOG.info("Starting test testDecommissionWithOpenfile");
//At most 4 nodes will be decommissioned
startCluster(1, 7, conf);
FileSystem fileSys = cluster.getFileSystem(0);
FSNamesystem ns = cluster.getNamesystem(0);
String openFile = "/testDecommissionWithOpenfile.dat";
writeFile(fileSys, new Path(openFile), (short)3);
// make sure the file was open for write
FSDataOutputStream fdos = fileSys.append(new Path(openFile));
LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(cluster.getNameNode(0), openFile, 0, fileSize);
DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
DatanodeInfo[] dnInfos4FirstBlock = lbs.get(0).getLocations();
ArrayList<String> nodes = new ArrayList<String>();
ArrayList<DatanodeInfo> dnInfos = new ArrayList<DatanodeInfo>();
for (DatanodeInfo datanodeInfo : dnInfos4FirstBlock) {
DatanodeInfo found = datanodeInfo;
for (DatanodeInfo dif: dnInfos4LastBlock) {
if (datanodeInfo.equals(dif)) {
found = null;
}
}
if (found != null) {
nodes.add(found.getXferAddr());
dnInfos.add(found);
}
}
//decommission one of the 3 nodes which have last block
nodes.add(dnInfos4LastBlock[0].getXferAddr());
dnInfos.add(dnInfos4LastBlock[0]);
writeConfigFile(excludeFile, nodes);
refreshNodes(ns, conf);
for (DatanodeInfo dn : dnInfos) {
waitNodeState(dn, AdminStates.DECOMMISSIONED);
}
fdos.close();
}
}