HDFS-9034. StorageTypeStats Metric should not count failed storage. Contributed by Surendra Singh Lilhore.

This commit is contained in:
Benoy Antony 2015-12-22 15:28:17 -08:00
parent 867048c3e4
commit df83230948
4 changed files with 81 additions and 7 deletions

View File

@ -963,6 +963,7 @@ nodes with its data cleared (or user can just remove the StorageID
// no need to update its timestamp
// because its is done when the descriptor is created
heartbeatManager.addDatanode(nodeDescr);
heartbeatManager.updateDnStat(nodeDescr);
incrementVersionCount(nodeReg.getSoftwareVersion());
startDecommissioningIfExcluded(nodeDescr);
success = true;

View File

@ -19,6 +19,7 @@
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import java.util.EnumMap;
import java.util.HashSet;
@ -61,9 +62,11 @@ synchronized void add(final DatanodeDescriptor node) {
}
Set<StorageType> storageTypes = new HashSet<>();
for (DatanodeStorageInfo storageInfo : node.getStorageInfos()) {
if (storageInfo.getState() != DatanodeStorage.State.FAILED) {
statsMap.addStorage(storageInfo, node);
storageTypes.add(storageInfo.getStorageType());
}
}
for (StorageType storageType : storageTypes) {
statsMap.addNode(storageType, node);
}
@ -86,9 +89,11 @@ synchronized void subtract(final DatanodeDescriptor node) {
}
Set<StorageType> storageTypes = new HashSet<>();
for (DatanodeStorageInfo storageInfo : node.getStorageInfos()) {
if (storageInfo.getState() != DatanodeStorage.State.FAILED) {
statsMap.subtractStorage(storageInfo, node);
storageTypes.add(storageInfo.getStorageType());
}
}
for (StorageType storageType : storageTypes) {
statsMap.subtractNode(storageType, node);
}
@ -202,10 +207,12 @@ private void subtractStorage(final DatanodeStorageInfo info,
private void subtractNode(StorageType storageType,
final DatanodeDescriptor node) {
StorageTypeStats storageTypeStats =
storageTypeStatsMap.get(storageType);
StorageTypeStats storageTypeStats = storageTypeStatsMap.get(storageType);
if (storageTypeStats != null) {
storageTypeStats.subtractNode(node);
if (storageTypeStats.getNodesInService() == 0) {
storageTypeStatsMap.remove(storageType);
}
}
}
}

View File

@ -203,6 +203,7 @@ synchronized void register(final DatanodeDescriptor d) {
//update its timestamp
d.updateHeartbeatState(StorageReport.EMPTY_ARRAY, 0L, 0L, 0, 0, null);
stats.add(d);
}
}
@ -212,11 +213,14 @@ synchronized DatanodeDescriptor[] getDatanodes() {
synchronized void addDatanode(final DatanodeDescriptor d) {
// update in-service node count
stats.add(d);
datanodes.add(d);
d.setAlive(true);
}
void updateDnStat(final DatanodeDescriptor d){
stats.add(d);
}
synchronized void removeDatanode(DatanodeDescriptor node) {
if (node.isAlive()) {
stats.subtract(node);

View File

@ -18,10 +18,12 @@
package org.apache.hadoop.hdfs.server.blockmanagement;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
@ -29,9 +31,12 @@
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@ -143,4 +148,61 @@ public void testStorageTypeStatsJMX() throws Exception {
assertTrue(typesPresent.contains("DISK"));
assertTrue(typesPresent.contains("RAM_DISK"));
}
@Test
public void testStorageTypeStatsWhenStorageFailed() throws Exception {
DFSTestUtil.createFile(cluster.getFileSystem(),
new Path("/blockStatsFile1"), 1024, (short) 1, 0L);
Map<StorageType, StorageTypeStats> storageTypeStatsMap = cluster
.getNamesystem().getBlockManager().getStorageTypeStats();
StorageTypeStats storageTypeStats = storageTypeStatsMap
.get(StorageType.RAM_DISK);
assertEquals(6, storageTypeStats.getNodesInService());
storageTypeStats = storageTypeStatsMap.get(StorageType.DISK);
assertEquals(3, storageTypeStats.getNodesInService());
storageTypeStats = storageTypeStatsMap.get(StorageType.ARCHIVE);
assertEquals(3, storageTypeStats.getNodesInService());
String dataDir = cluster.getDataDirectory();
File dn1ArcVol1 = new File(dataDir, "data" + (3 * 0 + 2));
File dn2ArcVol1 = new File(dataDir, "data" + (3 * 1 + 2));
File dn3ArcVol1 = new File(dataDir, "data" + (3 * 2 + 2));
DataNodeTestUtils.injectDataDirFailure(dn1ArcVol1);
DataNodeTestUtils.injectDataDirFailure(dn2ArcVol1);
DataNodeTestUtils.injectDataDirFailure(dn3ArcVol1);
try {
DFSTestUtil.createFile(cluster.getFileSystem(), new Path(
"/blockStatsFile2"), 1024, (short) 1, 0L);
fail("Should throw exception, becuase no DISK storage available");
} catch (Exception e) {
assertTrue(e.getMessage().contains(
"could only be replicated to 0 nodes instead"));
}
// wait for heartbeat
Thread.sleep(6000);
storageTypeStatsMap = cluster.getNamesystem().getBlockManager()
.getStorageTypeStats();
assertFalse("StorageTypeStatsMap should not contain DISK Storage type",
storageTypeStatsMap.containsKey(StorageType.DISK));
DataNodeTestUtils.restoreDataDirFromFailure(dn1ArcVol1);
DataNodeTestUtils.restoreDataDirFromFailure(dn2ArcVol1);
DataNodeTestUtils.restoreDataDirFromFailure(dn3ArcVol1);
for (int i = 0; i < 3; i++) {
cluster.restartDataNode(0, true);
}
// wait for heartbeat
Thread.sleep(6000);
storageTypeStatsMap = cluster.getNamesystem().getBlockManager()
.getStorageTypeStats();
storageTypeStats = storageTypeStatsMap.get(StorageType.RAM_DISK);
assertEquals(6, storageTypeStats.getNodesInService());
storageTypeStats = storageTypeStatsMap.get(StorageType.DISK);
assertEquals(3, storageTypeStats.getNodesInService());
storageTypeStats = storageTypeStatsMap.get(StorageType.ARCHIVE);
assertEquals(3, storageTypeStats.getNodesInService());
}
}