HDFS-5579. Under construction files make DataNode decommission take very long hours. Contributed by zhaoyunjiong.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1557904 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e06ae2d567
commit
e210519d32
@ -749,6 +749,9 @@ Release 2.4.0 - UNRELEASED
|
||||
HDFS-5710. FSDirectory#getFullPathName should check inodes against null.
|
||||
(Uma Maheswara Rao G via jing9)
|
||||
|
||||
HDFS-5579. Under construction files make DataNode decommission take very long
|
||||
hours. (zhaoyunjiong via jing9)
|
||||
|
||||
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
||||
|
||||
HDFS-4985. Add storage type to the protocol and expose it in block report
|
||||
|
@ -31,7 +31,7 @@ public interface BlockCollection {
|
||||
/**
|
||||
* Get the last block of the collection.
|
||||
*/
|
||||
public BlockInfo getLastBlock() throws IOException;
|
||||
public BlockInfo getLastBlock();
|
||||
|
||||
/**
|
||||
* Get content summary.
|
||||
|
@ -1214,8 +1214,10 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
|
||||
// block should belong to a file
|
||||
bc = blocksMap.getBlockCollection(block);
|
||||
// abandoned block or block reopened for append
|
||||
if(bc == null || bc.isUnderConstruction()) {
|
||||
neededReplications.remove(block, priority); // remove from neededReplications
|
||||
if (bc == null
|
||||
|| (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
|
||||
// remove from neededReplications
|
||||
neededReplications.remove(block, priority);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1295,7 +1297,7 @@ int computeReplicationWorkForBlocks(List<List<Block>> blocksToReplicate) {
|
||||
// block should belong to a file
|
||||
bc = blocksMap.getBlockCollection(block);
|
||||
// abandoned block or block reopened for append
|
||||
if(bc == null || bc.isUnderConstruction()) {
|
||||
if(bc == null || (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
|
||||
neededReplications.remove(block, priority); // remove from neededReplications
|
||||
rw.targets = null;
|
||||
continue;
|
||||
@ -2906,8 +2908,16 @@ boolean isReplicationInProgress(DatanodeDescriptor srcNode) {
|
||||
NumberReplicas num = countNodes(block);
|
||||
int curReplicas = num.liveReplicas();
|
||||
int curExpectedReplicas = getReplication(block);
|
||||
|
||||
if (isNeededReplication(block, curExpectedReplicas, curReplicas)) {
|
||||
if (curExpectedReplicas > curReplicas) {
|
||||
if (bc.isUnderConstruction()) {
|
||||
if (block.equals(bc.getLastBlock()) && curReplicas > minReplication) {
|
||||
continue;
|
||||
}
|
||||
underReplicatedInOpenFiles++;
|
||||
}
|
||||
|
||||
// Log info about one block for this node which needs replication
|
||||
if (!status) {
|
||||
status = true;
|
||||
@ -2924,9 +2934,6 @@ boolean isReplicationInProgress(DatanodeDescriptor srcNode) {
|
||||
if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) {
|
||||
decommissionOnlyReplicas++;
|
||||
}
|
||||
if (bc.isUnderConstruction()) {
|
||||
underReplicatedInOpenFiles++;
|
||||
}
|
||||
}
|
||||
if (!neededReplications.contains(block) &&
|
||||
pendingReplications.getNumReplicas(block) == 0) {
|
||||
|
@ -640,7 +640,7 @@ BlockInfo getPenultimateBlock() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public BlockInfo getLastBlock() throws IOException {
|
||||
public BlockInfo getLastBlock() {
|
||||
return blocks == null || blocks.length == 0? null: blocks[blocks.length-1];
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
@ -779,4 +780,53 @@ public void testIncludeByRegistrationName() throws IOException,
|
||||
Thread.sleep(HEARTBEAT_INTERVAL * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=120000)
|
||||
public void testDecommissionWithOpenfile() throws IOException, InterruptedException {
|
||||
LOG.info("Starting test testDecommissionWithOpenfile");
|
||||
|
||||
//At most 4 nodes will be decommissioned
|
||||
startCluster(1, 7, conf);
|
||||
|
||||
FileSystem fileSys = cluster.getFileSystem(0);
|
||||
FSNamesystem ns = cluster.getNamesystem(0);
|
||||
|
||||
String openFile = "/testDecommissionWithOpenfile.dat";
|
||||
|
||||
writeFile(fileSys, new Path(openFile), (short)3);
|
||||
// make sure the file was open for write
|
||||
FSDataOutputStream fdos = fileSys.append(new Path(openFile));
|
||||
|
||||
LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(cluster.getNameNode(0), openFile, 0, fileSize);
|
||||
|
||||
DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
|
||||
DatanodeInfo[] dnInfos4FirstBlock = lbs.get(0).getLocations();
|
||||
|
||||
ArrayList<String> nodes = new ArrayList<String>();
|
||||
ArrayList<DatanodeInfo> dnInfos = new ArrayList<DatanodeInfo>();
|
||||
|
||||
for (DatanodeInfo datanodeInfo : dnInfos4FirstBlock) {
|
||||
DatanodeInfo found = datanodeInfo;
|
||||
for (DatanodeInfo dif: dnInfos4LastBlock) {
|
||||
if (datanodeInfo.equals(dif)) {
|
||||
found = null;
|
||||
}
|
||||
}
|
||||
if (found != null) {
|
||||
nodes.add(found.getXferAddr());
|
||||
dnInfos.add(found);
|
||||
}
|
||||
}
|
||||
//decommission one of the 3 nodes which have last block
|
||||
nodes.add(dnInfos4LastBlock[0].getXferAddr());
|
||||
dnInfos.add(dnInfos4LastBlock[0]);
|
||||
|
||||
writeConfigFile(excludeFile, nodes);
|
||||
refreshNodes(ns, conf);
|
||||
for (DatanodeInfo dn : dnInfos) {
|
||||
waitNodeState(dn, AdminStates.DECOMMISSIONED);
|
||||
}
|
||||
|
||||
fdos.close();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user