From 945b504c256d196c50634f61f3efe65a3b9a13a5 Mon Sep 17 00:00:00 2001 From: Inigo Goiri Date: Tue, 5 Mar 2019 17:39:52 -0800 Subject: [PATCH] HDFS-14326. Add CorruptFilesCount to JMX. Contributed by Danny Becker. --- .../metrics/NamenodeBeanMetrics.java | 5 +++ .../hdfs/server/namenode/FSNamesystem.java | 11 ++++- .../hdfs/server/namenode/NameNodeMXBean.java | 7 +++ .../namenode/TestListCorruptFileBlocks.java | 43 ++++++++++++++----- .../server/namenode/TestNameNodeMXBean.java | 5 +++ 5 files changed, 60 insertions(+), 11 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index 0ca5f737dd..73765cf0df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -335,6 +335,11 @@ public String getCorruptFiles() { return "N/A"; } + @Override + public int getCorruptFilesCount() { + return 0; + } + @Override public int getThreads() { return ManagementFactory.getThreadMXBean().getThreadCount(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index d0fdbac822..ba806d2b52 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -6350,6 +6350,15 @@ public HAContext getHAContext() { @Override // NameNodeMXBean public String getCorruptFiles() { + return JSON.toString(getCorruptFilesList()); + } + + @Override // NameNodeMXBean + public int getCorruptFilesCount() { + return getCorruptFilesList().size(); + } + + private List getCorruptFilesList() { List list = new ArrayList(); Collection corruptFileBlocks; try { @@ -6367,7 +6376,7 @@ public String getCorruptFiles() { } catch (IOException e) { LOG.warn("Get corrupt file blocks returned error", e); } - return JSON.toString(list); + return list; } @Override // NameNodeMXBean diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java index b8665ff923..69fa01023a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java @@ -296,6 +296,13 @@ public interface NameNodeMXBean { */ String getCorruptFiles(); + /** + * Get the length of the list of corrupt files. + * + * @return the length of the list of corrupt files. + */ + int getCorruptFilesCount(); + /** * Get the number of distinct versions of live datanodes. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java index 6bfc64d8f8..a197c00bce 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListCorruptFileBlocks.java @@ -95,6 +95,7 @@ public void testListCorruptFilesCorruptedBlock() throws Exception { getNamesystem().listCorruptFileBlocks("/", null); assertEquals("Namenode has " + badFiles.size() + " corrupt files. Expecting None.", 0, badFiles.size()); + assertCorruptFilesCount(cluster, badFiles.size()); // Now deliberately corrupt one block String bpid = cluster.getNamesystem().getBlockPoolId(); @@ -128,8 +129,9 @@ public void testListCorruptFilesCorruptedBlock() throws Exception { // fetch bad file list from namenode. There should be one file. badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null); LOG.info("Namenode has bad files. " + badFiles.size()); - assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", - badFiles.size() == 1); + assertEquals("Namenode has " + badFiles.size() + " bad files. " + + "Expecting 1.", 1, badFiles.size()); + assertCorruptFilesCount(cluster, badFiles.size()); util.cleanup(fs, "/srcdat10"); } finally { if (cluster != null) { cluster.shutdown(); } @@ -176,6 +178,7 @@ public void testListCorruptFileBlocksInSafeMode() throws Exception { cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null); assertEquals("Namenode has " + badFiles.size() + " corrupt files. Expecting None.", 0, badFiles.size()); + assertCorruptFilesCount(cluster, badFiles.size()); // Now deliberately corrupt one block File storageDir = cluster.getInstanceStorageDir(0, 0); @@ -211,8 +214,9 @@ public void testListCorruptFileBlocksInSafeMode() throws Exception { badFiles = cluster.getNameNode().getNamesystem(). listCorruptFileBlocks("/", null); LOG.info("Namenode has bad files. " + badFiles.size()); - assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", - badFiles.size() == 1); + assertEquals("Namenode has " + badFiles.size() + " bad files. " + + "Expecting 1.", 1, badFiles.size()); + assertCorruptFilesCount(cluster, badFiles.size()); // restart namenode cluster.restartNameNode(0); @@ -243,8 +247,9 @@ public void testListCorruptFileBlocksInSafeMode() throws Exception { badFiles = cluster.getNameNode().getNamesystem(). listCorruptFileBlocks("/", null); LOG.info("Namenode has bad files. " + badFiles.size()); - assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", - badFiles.size() == 1); + assertEquals("Namenode has " + badFiles.size() + " bad files. " + + "Expecting 1.", 1, badFiles.size()); + assertCorruptFilesCount(cluster, badFiles.size()); // check that we are still in safe mode assertTrue("Namenode is not in safe mode", @@ -288,7 +293,8 @@ public void testlistCorruptFileBlocks() throws Exception { Collection corruptFileBlocks = namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null); int numCorrupt = corruptFileBlocks.size(); - assertTrue(numCorrupt == 0); + assertEquals(0, numCorrupt); + assertCorruptFilesCount(cluster, numCorrupt); // delete the blocks String bpid = cluster.getNamesystem().getBlockPoolId(); for (int i = 0; i < 4; i++) { @@ -328,6 +334,7 @@ public void testlistCorruptFileBlocks() throws Exception { // Validate we get all the corrupt files LOG.info("Namenode has bad files. " + numCorrupt); assertEquals(3, numCorrupt); + assertCorruptFilesCount(cluster, numCorrupt); // test the paging here FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks @@ -400,6 +407,7 @@ public void testlistCorruptFileBlocksDFS() throws Exception { dfs.listCorruptFileBlocks(new Path("/corruptData")); int numCorrupt = countPaths(corruptFileBlocks); assertEquals(0, numCorrupt); + assertCorruptFilesCount(cluster, numCorrupt); // delete the blocks String bpid = cluster.getNamesystem().getBlockPoolId(); // For loop through number of datadirectories per datanode (2) @@ -436,6 +444,7 @@ public void testlistCorruptFileBlocksDFS() throws Exception { // Validate we get all the corrupt files LOG.info("Namenode has bad files. " + numCorrupt); assertEquals(3, numCorrupt); + assertCorruptFilesCount(cluster, numCorrupt); util.cleanup(fs, "/corruptData"); util.cleanup(fs, "/goodData"); @@ -477,6 +486,7 @@ public void testMaxCorruptFiles() throws Exception { assertEquals( "Namenode has " + badFiles.size() + " corrupt files. Expecting none.", 0, badFiles.size()); + assertCorruptFilesCount(cluster, badFiles.size()); // Now deliberately blocks from all files final String bpid = cluster.getNamesystem().getBlockPoolId(); @@ -520,9 +530,9 @@ public void testMaxCorruptFiles() throws Exception { badFiles = namenode.getNamesystem(). listCorruptFileBlocks("/srcdat2", null); LOG.info("Namenode has bad files. " + badFiles.size()); - assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + - maxCorruptFileBlocks + ".", - badFiles.size() == maxCorruptFileBlocks); + assertEquals("Namenode has " + badFiles.size() + " bad files. " + + "Expecting " + maxCorruptFileBlocks + ".", maxCorruptFileBlocks, + badFiles.size()); CorruptFileBlockIterator iter = (CorruptFileBlockIterator) fs.listCorruptFileBlocks(new Path("/srcdat2")); @@ -566,6 +576,7 @@ public void testListCorruptFileBlocksOnRelativePath() throws Exception { .listCorruptFileBlocks(new Path("corruptData")); int numCorrupt = countPaths(corruptFileBlocks); assertEquals(0, numCorrupt); + assertCorruptFilesCount(cluster, numCorrupt); // delete the blocks String bpid = cluster.getNamesystem().getBlockPoolId(); @@ -608,4 +619,16 @@ public void testListCorruptFileBlocksOnRelativePath() throws Exception { } } } + + /** + * Asserts that the number of correct files is equal to the expected value. + * @param cluster where to get the number of corrupt files from + * @param expectedCorrupt the expected number of corrupt files + */ + private void assertCorruptFilesCount(MiniDFSCluster cluster, + int expectedCorrupt) { + FSNamesystem fs = cluster.getNameNode().getNamesystem(); + assertEquals("Incorrect number of corrupt files returned", expectedCorrupt, + fs.getCorruptFilesCount()); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index 858032fb55..f6da894dc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -222,6 +222,11 @@ public void testNameNodeMXBeanInfo() throws Exception { "CorruptFiles")); assertEquals("Bad value for CorruptFiles", fsn.getCorruptFiles(), corruptFiles); + // get attribute CorruptFilesCount + int corruptFilesCount = (int) (mbs.getAttribute(mxbeanName, + "CorruptFilesCount")); + assertEquals("Bad value for CorruptFilesCount", + fsn.getCorruptFilesCount(), corruptFilesCount); // get attribute NameDirStatuses String nameDirStatuses = (String) (mbs.getAttribute(mxbeanName, "NameDirStatuses"));