diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e73cf11f25..25505fa501 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -292,6 +292,9 @@ Release 2.7.0 - UNRELEASED HDFS-6824. Additional user documentation for HDFS encryption. (wang) + HDFS-7165. Separate block metrics for files with replication count 1. + (Zhe Zhang via wang) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 68e355aa93..1fb1af1130 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -2355,6 +2355,16 @@ public long getMissingBlocksCount() throws IOException { return namenode.getStats()[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX]; } + /** + * Returns count of blocks with replication factor 1 and have + * lost the only replica. + * @throws IOException + */ + public long getMissingReplOneBlocksCount() throws IOException { + return namenode.getStats()[ClientProtocol. + GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX]; + } + /** * Returns count of blocks with one of more replica missing. * @throws IOException diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index cd000e23a2..1ad7ca36c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -930,6 +930,16 @@ public long getMissingBlocksCount() throws IOException { return dfs.getMissingBlocksCount(); } + /** + * Returns count of blocks with replication factor 1 and have + * lost the only replica. + * + * @throws IOException + */ + public long getMissingReplOneBlocksCount() throws IOException { + return dfs.getMissingReplOneBlocksCount(); + } + /** * Returns count of blocks with one of more replica missing. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index d29d2ebdcc..6506bef52e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -652,6 +652,7 @@ public void renewLease(String clientName) throws AccessControlException, public int GET_STATS_UNDER_REPLICATED_IDX = 3; public int GET_STATS_CORRUPT_BLOCKS_IDX = 4; public int GET_STATS_MISSING_BLOCKS_IDX = 5; + public int GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX = 6; /** * Get a set of statistics about the filesystem. @@ -663,7 +664,8 @@ public void renewLease(String clientName) throws AccessControlException, *
  • [3] contains number of under replicated blocks in the system.
  • *
  • [4] contains number of blocks with a corrupt replica.
  • *
  • [5] contains number of blocks without any good replicas left.
  • - *
  • [6] contains the total used space of the block pool.
  • + *
  • [6] contains number of blocks which have replication factor + * 1 and have lost the only replica.
  • * * Use public constants like {@link #GET_STATS_CAPACITY_IDX} in place of * actual numbers to index into the array. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 6470a1c91e..e48e85f255 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -1545,13 +1545,15 @@ public static DirectoryListingProto convert(DirectoryListing d) { } public static long[] convert(GetFsStatsResponseProto res) { - long[] result = new long[6]; + long[] result = new long[7]; result[ClientProtocol.GET_STATS_CAPACITY_IDX] = res.getCapacity(); result[ClientProtocol.GET_STATS_USED_IDX] = res.getUsed(); result[ClientProtocol.GET_STATS_REMAINING_IDX] = res.getRemaining(); result[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = res.getUnderReplicated(); result[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = res.getCorruptBlocks(); result[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = res.getMissingBlocks(); + result[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = + res.getMissingReplOneBlocks(); return result; } @@ -1573,6 +1575,9 @@ public static GetFsStatsResponseProto convert(long[] fsStats) { if (fsStats.length >= ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX + 1) result.setMissingBlocks( fsStats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX]); + if (fsStats.length >= ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX + 1) + result.setMissingReplOneBlocks( + fsStats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX]); return result.build(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index c11a79a41e..98388d41bf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -3438,6 +3438,11 @@ public long getMissingBlocksCount() { return this.neededReplications.getCorruptBlockSize(); } + public long getMissingReplOneBlocksCount() { + // not locking + return this.neededReplications.getCorruptReplOneBlockSize(); + } + public BlockInfo addBlockCollection(BlockInfo block, BlockCollection bc) { return blocksMap.addBlockCollection(block, bc); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java index 66a7ca57b5..9286609399 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java @@ -179,7 +179,7 @@ public synchronized long[] getStats() { -1L, -1L, -1L, - getBlockPoolUsed()}; + -1L}; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/UnderReplicatedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/UnderReplicatedBlocks.java index 549f05594c..7d0eea4c07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/UnderReplicatedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/UnderReplicatedBlocks.java @@ -81,6 +81,9 @@ class UnderReplicatedBlocks implements Iterable { private final List> priorityQueues = new ArrayList>(LEVEL); + /** The number of corrupt blocks with replication factor 1 */ + private int corruptReplOneBlocks = 0; + /** Create an object. */ UnderReplicatedBlocks() { for (int i = 0; i < LEVEL; i++) { @@ -122,6 +125,11 @@ synchronized int getCorruptBlockSize() { return priorityQueues.get(QUEUE_WITH_CORRUPT_BLOCKS).size(); } + /** Return the number of corrupt blocks with replication factor 1 */ + synchronized int getCorruptReplOneBlockSize() { + return corruptReplOneBlocks; + } + /** Check if a block is in the neededReplication queue */ synchronized boolean contains(Block block) { for(LightWeightLinkedSet set : priorityQueues) { @@ -183,6 +191,10 @@ synchronized boolean add(Block block, int priLevel = getPriority(block, curReplicas, decomissionedReplicas, expectedReplicas); if(priorityQueues.get(priLevel).add(block)) { + if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS && + expectedReplicas == 1) { + corruptReplOneBlocks++; + } if(NameNode.blockStateChangeLog.isDebugEnabled()) { NameNode.blockStateChangeLog.debug( "BLOCK* NameSystem.UnderReplicationBlock.add:" @@ -205,7 +217,16 @@ synchronized boolean remove(Block block, int priLevel = getPriority(block, oldReplicas, decommissionedReplicas, oldExpectedReplicas); - return remove(block, priLevel); + boolean removedBlock = remove(block, priLevel); + if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS && + oldExpectedReplicas == 1 && + removedBlock) { + corruptReplOneBlocks--; + assert corruptReplOneBlocks >= 0 : + "Number of corrupt blocks with replication factor 1 " + + "should be non-negative"; + } + return removedBlock; } /** @@ -299,6 +320,18 @@ synchronized void update(Block block, int curReplicas, + " at priority level " + curPri); } } + if (oldPri != curPri || expectedReplicasDelta != 0) { + // corruptReplOneBlocks could possibly change + if (curPri == QUEUE_WITH_CORRUPT_BLOCKS && + curExpectedReplicas == 1) { + // add a new corrupt block with replication factor 1 + corruptReplOneBlocks++; + } else if (oldPri == QUEUE_WITH_CORRUPT_BLOCKS && + curExpectedReplicas - expectedReplicasDelta == 1) { + // remove an existing corrupt block with replication factor 1 + corruptReplOneBlocks--; + } + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 8e1ea7934c..c42978c83a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -5298,6 +5298,13 @@ public long getMissingBlocksCount() { // not locking return blockManager.getMissingBlocksCount(); } + + @Metric({"MissingReplOneBlocks", "Number of missing blocks " + + "with replication factor 1"}) + public long getMissingReplOneBlocksCount() { + // not locking + return blockManager.getMissingReplOneBlocksCount(); + } @Metric({"ExpiredHeartbeats", "Number of expired heartbeats"}) public int getExpiredHeartbeats() { @@ -5339,6 +5346,8 @@ long[] getStats() { stats[ClientProtocol.GET_STATS_UNDER_REPLICATED_IDX] = getUnderReplicatedBlocks(); stats[ClientProtocol.GET_STATS_CORRUPT_BLOCKS_IDX] = getCorruptReplicaBlocks(); stats[ClientProtocol.GET_STATS_MISSING_BLOCKS_IDX] = getMissingBlocksCount(); + stats[ClientProtocol.GET_STATS_MISSING_REPL_ONE_BLOCKS_IDX] = + getMissingReplOneBlocksCount(); return stats; } @@ -7605,6 +7614,11 @@ public long getNumberOfMissingBlocks() { return getMissingBlocksCount(); } + @Override // NameNodeMXBean + public long getNumberOfMissingBlocksWithReplicationFactorOne() { + return getMissingReplOneBlocksCount(); + } + @Override // NameNodeMXBean public int getThreads() { return ManagementFactory.getThreadMXBean().getThreadCount(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java index c9b4d55f46..3c317fda7d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java @@ -147,10 +147,19 @@ public interface NameNodeMXBean { /** * Gets the total number of missing blocks on the cluster * - * @return the total number of files and blocks on the cluster + * @return the total number of missing blocks on the cluster */ public long getNumberOfMissingBlocks(); + /** + * Gets the total number of missing blocks on the cluster with + * replication factor 1 + * + * @return the total number of missing blocks on the cluster with + * replication factor 1 + */ + public long getNumberOfMissingBlocksWithReplicationFactorOne(); + /** * Gets the number of threads. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 041bc3d26e..6beab46de3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -458,6 +458,8 @@ public void report(String[] argv, int i) throws IOException { dfs.getCorruptBlocksCount()); System.out.println("Missing blocks: " + dfs.getMissingBlocksCount()); + System.out.println("Missing blocks (with replication factor 1): " + + dfs.getMissingReplOneBlocksCount()); System.out.println(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto index 232c264a1a..ad758655a3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto @@ -283,6 +283,7 @@ message GetFsStatsResponseProto { required uint64 under_replicated = 4; required uint64 corrupt_blocks = 5; required uint64 missing_blocks = 6; + optional uint64 missing_repl_one_blocks = 7; } enum DatanodeReportTypeProto { // type of the datanode report diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java index 099d5472af..34b2188b47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMissingBlocksAlert.java @@ -77,7 +77,6 @@ public void testMissingBlocksAlert() Path corruptFile = new Path("/testMissingBlocks/corruptFile"); DFSTestUtil.createFile(dfs, corruptFile, fileLen, (short)3, 0); - // Corrupt the block ExtendedBlock block = DFSTestUtil.getFirstBlock(dfs, corruptFile); assertTrue(TestDatanodeBlockScanner.corruptReplica(block, 0)); @@ -120,6 +119,24 @@ public void testMissingBlocksAlert() Assert.assertEquals(0, (long)(Long) mbs.getAttribute(mxbeanName, "NumberOfMissingBlocks")); + + Path replOneFile = new Path("/testMissingBlocks/replOneFile"); + DFSTestUtil.createFile(dfs, replOneFile, fileLen, (short)1, 0); + ExtendedBlock replOneBlock = DFSTestUtil.getFirstBlock( + dfs, replOneFile); + assertTrue(TestDatanodeBlockScanner.corruptReplica( + replOneBlock, 0)); + + // read the file so that the corrupt block is reported to NN + in = dfs.open(replOneFile); + try { + in.readFully(new byte[fileLen]); + } catch (ChecksumException ignored) { // checksum error is expected. + } + in.close(); + assertEquals(1, dfs.getMissingReplOneBlocksCount()); + Assert.assertEquals(1, (long)(Long) mbs.getAttribute(mxbeanName, + "NumberOfMissingBlocksWithReplicationFactorOne")); } finally { if (cluster != null) { cluster.shutdown(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestUnderReplicatedBlockQueues.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestUnderReplicatedBlockQueues.java index 20c2541119..e87a043762 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestUnderReplicatedBlockQueues.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestUnderReplicatedBlockQueues.java @@ -19,10 +19,14 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import org.apache.hadoop.hdfs.protocol.Block; -import org.junit.Assert; import org.junit.Test; -public class TestUnderReplicatedBlockQueues extends Assert { +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +public class TestUnderReplicatedBlockQueues { /** * Test that adding blocks with different replication counts puts them @@ -36,6 +40,7 @@ public void testBlockPriorities() throws Throwable { Block block2 = new Block(2); Block block_very_under_replicated = new Block(3); Block block_corrupt = new Block(4); + Block block_corrupt_repl_one = new Block(5); //add a block with a single entry assertAdded(queues, block1, 1, 0, 3); @@ -64,6 +69,16 @@ public void testBlockPriorities() throws Throwable { assertInLevel(queues, block_very_under_replicated, UnderReplicatedBlocks.QUEUE_VERY_UNDER_REPLICATED); + //insert a corrupt block with replication factor 1 + assertAdded(queues, block_corrupt_repl_one, 0, 0, 1); + assertEquals(2, queues.getCorruptBlockSize()); + assertEquals(1, queues.getCorruptReplOneBlockSize()); + queues.update(block_corrupt_repl_one, 0, 0, 3, 0, 2); + assertEquals(0, queues.getCorruptReplOneBlockSize()); + queues.update(block_corrupt, 0, 0, 1, 0, -2); + assertEquals(1, queues.getCorruptReplOneBlockSize()); + queues.update(block_very_under_replicated, 0, 0, 1, -4, -24); + assertEquals(2, queues.getCorruptReplOneBlockSize()); } private void assertAdded(UnderReplicatedBlocks queues, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index c7828b1ca0..1afbceaf90 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -295,6 +295,7 @@ public void testMissingBlock() throws Exception { MetricsRecordBuilder rb = getMetrics(NS_METRICS); assertGauge("UnderReplicatedBlocks", 1L, rb); assertGauge("MissingBlocks", 1L, rb); + assertGauge("MissingReplOneBlocks", 1L, rb); fs.delete(file, true); waitForDnMetricValue(NS_METRICS, "UnderReplicatedBlocks", 0L); }