From 78af6cdc5359404139665d81447f28d26b7bb43b Mon Sep 17 00:00:00 2001 From: Manoj Govindassamy Date: Tue, 10 Oct 2017 14:23:29 -0700 Subject: [PATCH] HDFS-12573. Divide the total blocks metrics into replicated and erasure coded. Contributed by Takanobu Asanuma. --- .../server/blockmanagement/BlockManager.java | 10 ++ .../server/blockmanagement/BlocksMap.java | 40 ++++++- .../hdfs/server/namenode/FSNamesystem.java | 12 ++ .../namenode/metrics/ECBlockGroupsMBean.java | 5 + .../metrics/ReplicatedBlocksMBean.java | 5 + .../server/namenode/TestNameNodeMXBean.java | 107 +++++++++++++++++- 6 files changed, 177 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 0545bb2a1e..386ba277ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -256,6 +256,11 @@ public long getPendingDeletionReplicatedBlocks() { return invalidateBlocks.getBlocks(); } + /** Used by metrics. */ + public long getTotalReplicatedBlocks() { + return blocksMap.getReplicatedBlocks(); + } + /** Used by metrics. */ public long getLowRedundancyECBlockGroups() { return neededReconstruction.getLowRedundancyECBlockGroups(); @@ -276,6 +281,11 @@ public long getPendingDeletionECBlocks() { return invalidateBlocks.getECBlocks(); } + /** Used by metrics. */ + public long getTotalECBlockGroups() { + return blocksMap.getECBlockGroups(); + } + /** * redundancyRecheckInterval is how often namenode checks for new * reconstruction work. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java index f7cde90872..6f13da999c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java @@ -19,6 +19,7 @@ import java.util.Collections; import java.util.Iterator; +import java.util.concurrent.atomic.LongAdder; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.namenode.INodeId; @@ -37,6 +38,9 @@ class BlocksMap { private GSet blocks; + private final LongAdder totalReplicatedBlocks = new LongAdder(); + private final LongAdder totalECBlockGroups = new LongAdder(); + BlocksMap(int capacity) { // Use 2% of total memory to size the GSet capacity this.capacity = capacity; @@ -65,6 +69,8 @@ void close() { void clear() { if (blocks != null) { blocks.clear(); + totalReplicatedBlocks.reset(); + totalECBlockGroups.reset(); } } @@ -76,6 +82,7 @@ BlockInfo addBlockCollection(BlockInfo b, BlockCollection bc) { if (info != b) { info = b; blocks.put(info); + incrementBlockStat(info); } info.setBlockCollectionId(bc.getId()); return info; @@ -88,8 +95,10 @@ BlockInfo addBlockCollection(BlockInfo b, BlockCollection bc) { */ void removeBlock(Block block) { BlockInfo blockInfo = blocks.remove(block); - if (blockInfo == null) + if (blockInfo == null) { return; + } + decrementBlockStat(block); assert blockInfo.getBlockCollectionId() == INodeId.INVALID_INODE_ID; final int size = blockInfo.isStriped() ? @@ -166,6 +175,7 @@ boolean removeNode(Block b, DatanodeDescriptor node) { if (info.hasNoStorage() // no datanodes left && info.isDeleted()) { // does not belong to a file blocks.remove(b); // remove block from the map + decrementBlockStat(b); } return removed; } @@ -196,4 +206,32 @@ Iterable getBlocks() { int getCapacity() { return capacity; } + + private void incrementBlockStat(Block block) { + if (BlockIdManager.isStripedBlockID(block.getBlockId())) { + totalECBlockGroups.increment(); + } else { + totalReplicatedBlocks.increment(); + } + } + + private void decrementBlockStat(Block block) { + if (BlockIdManager.isStripedBlockID(block.getBlockId())) { + totalECBlockGroups.decrement(); + assert totalECBlockGroups.longValue() >= 0 : + "Total number of ec block groups should be non-negative"; + } else { + totalReplicatedBlocks.decrement(); + assert totalReplicatedBlocks.longValue() >= 0 : + "Total number of replicated blocks should be non-negative"; + } + } + + long getReplicatedBlocks() { + return totalReplicatedBlocks.longValue(); + } + + long getECBlockGroups() { + return totalECBlockGroups.longValue(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index dc50808a95..e8d7161d15 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4719,6 +4719,12 @@ public long getPendingDeletionReplicatedBlocks() { return blockManager.getPendingDeletionReplicatedBlocks(); } + @Override // ReplicatedBlocksMBean + @Metric({"TotalReplicatedBlocks", "Total number of replicated blocks"}) + public long getTotalReplicatedBlocks() { + return blockManager.getTotalReplicatedBlocks(); + } + @Override // ECBlockGroupsMBean @Metric({"LowRedundancyECBlockGroups", "Number of erasure coded block " + "groups with low redundancy"}) @@ -4754,6 +4760,12 @@ public long getPendingDeletionECBlocks() { return blockManager.getPendingDeletionECBlocks(); } + @Override // ECBlockGroupsMBean + @Metric({"TotalECBlockGroups", "Total number of erasure coded block groups"}) + public long getTotalECBlockGroups() { + return blockManager.getTotalECBlockGroups(); + } + @Override public long getBlockDeletionStartTime() { return startTime + blockManager.getStartupDelayBlockDeletionInMs(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java index 474f3edb97..808fc62880 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ECBlockGroupsMBean.java @@ -56,4 +56,9 @@ public interface ECBlockGroupsMBean { * Return count of erasure coded blocks that are pending deletion. */ long getPendingDeletionECBlocks(); + + /** + * Return total number of erasure coded block groups. + */ + long getTotalECBlockGroups(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ReplicatedBlocksMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ReplicatedBlocksMBean.java index e2c924e56f..a20dd4c0bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ReplicatedBlocksMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/ReplicatedBlocksMBean.java @@ -60,4 +60,9 @@ public interface ReplicatedBlocksMBean { * Return count of blocks that are pending deletion. */ long getPendingDeletionReplicatedBlocks(); + + /** + * Return total number of replicated blocks. + */ + long getTotalReplicatedBlocks(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index 93a75253ce..9c165d8fdb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -731,7 +731,8 @@ public void testVerifyMissingBlockGroupsMetrics() throws Exception { DistributedFileSystem fs = null; try { Configuration conf = new HdfsConfiguration(); - int dataBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumDataUnits(); + int dataBlocks = StripedFileTestUtil.getDefaultECPolicy() + .getNumDataUnits(); int parityBlocks = StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits(); int cellSize = StripedFileTestUtil.getDefaultECPolicy().getCellSize(); @@ -860,4 +861,108 @@ public Boolean get() { } } } + + @Test + public void testTotalBlocksMetrics() throws Exception { + MiniDFSCluster cluster = null; + FSNamesystem namesystem = null; + DistributedFileSystem fs = null; + try { + Configuration conf = new HdfsConfiguration(); + int dataBlocks = StripedFileTestUtil.getDefaultECPolicy() + .getNumDataUnits(); + int parityBlocks = + StripedFileTestUtil.getDefaultECPolicy().getNumParityUnits(); + int totalSize = dataBlocks + parityBlocks; + int cellSize = StripedFileTestUtil.getDefaultECPolicy().getCellSize(); + int stripesPerBlock = 2; + int blockSize = stripesPerBlock * cellSize; + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + + cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(totalSize).build(); + namesystem = cluster.getNamesystem(); + fs = cluster.getFileSystem(); + fs.enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + verifyTotalBlocksMetrics(0L, 0L, namesystem.getTotalBlocks()); + + // create small file + Path replDirPath = new Path("/replicated"); + Path replFileSmall = new Path(replDirPath, "replfile_small"); + final short factor = 3; + DFSTestUtil.createFile(fs, replFileSmall, blockSize, factor, 0); + DFSTestUtil.waitReplication(fs, replFileSmall, factor); + + Path ecDirPath = new Path("/striped"); + fs.mkdir(ecDirPath, FsPermission.getDirDefault()); + fs.getClient().setErasureCodingPolicy(ecDirPath.toString(), + StripedFileTestUtil.getDefaultECPolicy().getName()); + Path ecFileSmall = new Path(ecDirPath, "ecfile_small"); + final int smallLength = cellSize * dataBlocks; + final byte[] smallBytes = StripedFileTestUtil.generateBytes(smallLength); + DFSTestUtil.writeFile(fs, ecFileSmall, smallBytes); + verifyTotalBlocksMetrics(1L, 1L, namesystem.getTotalBlocks()); + + // create learge file + Path replFileLarge = new Path(replDirPath, "replfile_large"); + DFSTestUtil.createFile(fs, replFileLarge, 2 * blockSize, factor, 0); + DFSTestUtil.waitReplication(fs, replFileLarge, factor); + + Path ecFileLarge = new Path(ecDirPath, "ecfile_large"); + final int largeLength = blockSize * totalSize + smallLength; + final byte[] largeBytes = StripedFileTestUtil.generateBytes(largeLength); + DFSTestUtil.writeFile(fs, ecFileLarge, largeBytes); + verifyTotalBlocksMetrics(3L, 3L, namesystem.getTotalBlocks()); + + // delete replicated files + fs.delete(replDirPath, true); + verifyTotalBlocksMetrics(0L, 3L, namesystem.getTotalBlocks()); + + // delete ec files + fs.delete(ecDirPath, true); + verifyTotalBlocksMetrics(0L, 0L, namesystem.getTotalBlocks()); + } finally { + if (fs != null) { + try { + fs.close(); + } catch (Exception e) { + throw e; + } + } + if (namesystem != null) { + try { + namesystem.close(); + } catch (Exception e) { + throw e; + } + } + if (cluster != null) { + cluster.shutdown(); + } + } + } + + void verifyTotalBlocksMetrics(long expectedTotalReplicatedBlocks, + long expectedTotalECBlockGroups, long actualTotalBlocks) + throws Exception { + long expectedTotalBlocks = expectedTotalReplicatedBlocks + + expectedTotalECBlockGroups; + assertEquals("Unexpected total blocks!", expectedTotalBlocks, + actualTotalBlocks); + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName replStateMBeanName = new ObjectName( + "Hadoop:service=NameNode,name=ReplicatedBlocksState"); + ObjectName ecBlkGrpStateMBeanName = new ObjectName( + "Hadoop:service=NameNode,name=ECBlockGroupsState"); + Long totalReplicaBlocks = (Long) mbs.getAttribute(replStateMBeanName, + "TotalReplicatedBlocks"); + Long totalECBlockGroups = (Long) mbs.getAttribute(ecBlkGrpStateMBeanName, + "TotalECBlockGroups"); + assertEquals("Unexpected total replicated blocks!", + expectedTotalReplicatedBlocks, totalReplicaBlocks.longValue()); + assertEquals("Unexpected total ec block groups!", + expectedTotalECBlockGroups, totalECBlockGroups.longValue()); + } }