diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 2afe1f6d8a..e6eac4ce46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -11,6 +11,9 @@ HDFS-6584: Archival Storage HDFS-6671. Change BlockPlacementPolicy to consider block storage policy in replicaiton. (szetszwo) + HDFS-6710. Change BlockPlacementPolicy to consider block storage policy + in replica deletion. (szetszwo) + Trunk (Unreleased) INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java index c1ed64b5c7..ffcfbdb89c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java @@ -118,15 +118,40 @@ public List chooseStorageTypes(final short replication) { public List chooseStorageTypes(final short replication, final Iterable chosen) { final List types = chooseStorageTypes(replication); + diff(types, chosen, null); + return types; + } - //remove the chosen storage types - for(StorageType c : chosen) { - final int i = types.indexOf(c); + /** + * Compute the list difference t = t - c. + * Further, if e is not null, set e = e + c - t; + */ + private static void diff(List t, Iterable c, + List e) { + for(StorageType storagetype : c) { + final int i = t.indexOf(storagetype); if (i >= 0) { - types.remove(i); + t.remove(i); + } else if (e != null) { + e.add(storagetype); } } - return types; + } + + /** + * Choose excess storage types for deletion, given the + * replication number and the storage types of the chosen replicas. + * + * @param replication the replication number. + * @param chosen the storage types of the chosen replicas. + * @return a list of {@link StorageType}s for deletion. + */ + public List chooseExcess(final short replication, + final Iterable chosen) { + final List types = chooseStorageTypes(replication); + final List excess = new LinkedList(); + diff(types, chosen, excess); + return excess; } /** @return the fallback {@link StorageType} for creation. */ @@ -264,5 +289,5 @@ private static void throwIllegalArgumentException(String message, throw new IllegalArgumentException(message + " in " + DFS_BLOCK_STORAGE_POLICIES_KEY + " \"" + conf.get(DFS_BLOCK_STORAGE_POLICIES_KEY) + "\"."); - } + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index dc322b2859..e5536a5795 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportIterator; @@ -2712,6 +2713,10 @@ private void chooseExcessReplicates(final Collection nonExc assert namesystem.hasWriteLock(); // first form a rack to datanodes map and BlockCollection bc = getBlockCollection(b); + final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(bc.getStoragePolicyID()); + final List excessTypes = storagePolicy.chooseExcess( + replication, DatanodeStorageInfo.toStorageTypes(nonExcess)); + final Map> rackMap = new HashMap>(); @@ -2741,7 +2746,7 @@ private void chooseExcessReplicates(final Collection nonExc cur = delNodeHintStorage; } else { // regular excessive replica removal cur = replicator.chooseReplicaToDelete(bc, b, replication, - moreThanOne, exactlyOne); + moreThanOne, exactlyOne, excessTypes); } firstOne = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java index 8c6f57c199..00f7253275 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java @@ -119,18 +119,21 @@ abstract public BlockPlacementStatus verifyBlockPlacement(String srcPath, * @param srcBC block collection of file to which block-to-be-deleted belongs * @param block The block to be deleted * @param replicationFactor The required number of replicas for this block - * @param existingReplicas The replica locations of this block that are present - on at least two unique racks. - * @param moreExistingReplicas Replica locations of this block that are not - listed in the previous parameter. + * @param moreThanOne The replica locations of this block that are present + * on more than one unique racks. + * @param exactlyOne Replica locations of this block that are present + * on exactly one unique racks. + * @param excessTypes The excess {@link StorageType}s according to the + * {@link BlockStoragePolicy}. * @return the replica that is the best candidate for deletion */ abstract public DatanodeStorageInfo chooseReplicaToDelete( BlockCollection srcBC, Block block, short replicationFactor, - Collection existingReplicas, - Collection moreExistingReplicas); + Collection moreThanOne, + Collection exactlyOne, + List excessTypes); /** * Used to setup a BlockPlacementPolicy object. This should be defined by diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index 80e40cde89..6345a2c7c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.TreeSet; @@ -273,8 +272,8 @@ private Node chooseTarget(int numOfReplicas, // Keep a copy of original excludedNodes final Set oldExcludedNodes = avoidStaleNodes ? new HashSet(excludedNodes) : null; - final List storageTypes = chooseStorageTypes(storagePolicy, - (short)totalReplicasExpected, results); + final List storageTypes = storagePolicy.chooseStorageTypes( + (short)totalReplicasExpected, DatanodeStorageInfo.toStorageTypes(results)); try { if (numOfResults == 0) { writer = chooseLocalStorage(writer, excludedNodes, blocksize, @@ -671,28 +670,6 @@ private boolean isGoodTarget(DatanodeStorageInfo storage, } return true; } - - private static List chooseStorageTypes( - final BlockStoragePolicy storagePolicy, final short replication, - final Iterable chosen) { - return storagePolicy.chooseStorageTypes( - replication, new Iterable() { - @Override - public Iterator iterator() { - return new Iterator() { - final Iterator i = chosen.iterator(); - @Override - public boolean hasNext() {return i.hasNext();} - @Override - public StorageType next() {return i.next().getStorageType();} - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; - } - }); - } /** * Return a pipeline of nodes. @@ -759,7 +736,8 @@ public BlockPlacementStatus verifyBlockPlacement(String srcPath, public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, Block block, short replicationFactor, Collection first, - Collection second) { + Collection second, + final List excessTypes) { long oldestHeartbeat = now() - heartbeatInterval * tolerateHeartbeatMultiplier; DatanodeStorageInfo oldestHeartbeatStorage = null; @@ -769,6 +747,10 @@ public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, // Pick the node with the oldest heartbeat or with the least free space, // if all hearbeats are within the tolerable heartbeat interval for(DatanodeStorageInfo storage : pickupReplicaSet(first, second)) { + if (!excessTypes.contains(storage.getStorageType())) { + continue; + } + final DatanodeDescriptor node = storage.getDatanodeDescriptor(); long free = node.getRemaining(); long lastHeartbeat = node.getLastUpdate(); @@ -781,9 +763,10 @@ public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, minSpaceStorage = storage; } } - - return oldestHeartbeatStorage != null? oldestHeartbeatStorage - : minSpaceStorage; + final DatanodeStorageInfo storage = oldestHeartbeatStorage != null? + oldestHeartbeatStorage : minSpaceStorage; + excessTypes.remove(storage.getStorageType()); + return storage; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java index f7bab3ced6..8d2104bfb1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java @@ -292,6 +292,26 @@ public String toString() { return "[" + storageType + "]" + storageID + ":" + state; } + static Iterable toStorageTypes( + final Iterable infos) { + return new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + final Iterator i = infos.iterator(); + @Override + public boolean hasNext() {return i.hasNext();} + @Override + public StorageType next() {return i.next().getStorageType();} + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + /** @return the first {@link DatanodeStorageInfo} corresponding to * the given datanode */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java index df6ee82c00..869722c92d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.LogVerificationAppender; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -933,8 +934,10 @@ public void testChooseReplicaToDelete() throws Exception { // replica nodes, while storages[2] and dataNodes[5] are in second set. assertEquals(2, first.size()); assertEquals(2, second.size()); + List excessTypes = new ArrayList(); + excessTypes.add(StorageType.DEFAULT); DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete( - null, null, (short)3, first, second); + null, null, (short)3, first, second, excessTypes); // Within first set, storages[1] with less free space assertEquals(chosen, storages[1]); @@ -942,8 +945,9 @@ public void testChooseReplicaToDelete() throws Exception { assertEquals(0, first.size()); assertEquals(3, second.size()); // Within second set, storages[5] with less free space + excessTypes.add(StorageType.DEFAULT); chosen = replicator.chooseReplicaToDelete( - null, null, (short)2, first, second); + null, null, (short)2, first, second, excessTypes); assertEquals(chosen, storages[5]); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java index eb4ab1cbc0..526c490422 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.NameNode; @@ -612,8 +613,10 @@ public void testChooseReplicaToDelete() throws Exception { replicaList, rackMap, first, second); assertEquals(3, first.size()); assertEquals(1, second.size()); + List excessTypes = new ArrayList(); + excessTypes.add(StorageType.DEFAULT); DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete( - null, null, (short)3, first, second); + null, null, (short)3, first, second, excessTypes); // Within first set {dataNodes[0], dataNodes[1], dataNodes[2]}, // dataNodes[0] and dataNodes[1] are in the same nodegroup, // but dataNodes[1] is chosen as less free space @@ -624,16 +627,18 @@ public void testChooseReplicaToDelete() throws Exception { assertEquals(1, second.size()); // Within first set {dataNodes[0], dataNodes[2]}, dataNodes[2] is chosen // as less free space + excessTypes.add(StorageType.DEFAULT); chosen = replicator.chooseReplicaToDelete( - null, null, (short)2, first, second); + null, null, (short)2, first, second, excessTypes); assertEquals(chosen, storages[2]); replicator.adjustSetsWithChosenReplica(rackMap, first, second, chosen); assertEquals(0, first.size()); assertEquals(2, second.size()); // Within second set, dataNodes[5] with less free space + excessTypes.add(StorageType.DEFAULT); chosen = replicator.chooseReplicaToDelete( - null, null, (short)1, first, second); + null, null, (short)1, first, second, excessTypes); assertEquals(chosen, storages[5]); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java index 6d4a4c84cc..66f301b224 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; @@ -588,7 +589,8 @@ public RandomDeleterPolicy() { public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection inode, Block block, short replicationFactor, Collection first, - Collection second) { + Collection second, + List excessTypes) { Collection chooseFrom = !first.isEmpty() ? first : second;