HDFS-14326. Add CorruptFilesCount to JMX. Contributed by Danny Becker.

This commit is contained in:
Inigo Goiri 2019-03-05 17:39:52 -08:00
parent 1ad5bfc53f
commit 945b504c25
5 changed files with 60 additions and 11 deletions

View File

@ -335,6 +335,11 @@ public String getCorruptFiles() {
return "N/A"; return "N/A";
} }
@Override
public int getCorruptFilesCount() {
return 0;
}
@Override @Override
public int getThreads() { public int getThreads() {
return ManagementFactory.getThreadMXBean().getThreadCount(); return ManagementFactory.getThreadMXBean().getThreadCount();

View File

@ -6350,6 +6350,15 @@ public HAContext getHAContext() {
@Override // NameNodeMXBean @Override // NameNodeMXBean
public String getCorruptFiles() { public String getCorruptFiles() {
return JSON.toString(getCorruptFilesList());
}
@Override // NameNodeMXBean
public int getCorruptFilesCount() {
return getCorruptFilesList().size();
}
private List<String> getCorruptFilesList() {
List<String> list = new ArrayList<String>(); List<String> list = new ArrayList<String>();
Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks; Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks;
try { try {
@ -6367,7 +6376,7 @@ public String getCorruptFiles() {
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Get corrupt file blocks returned error", e); LOG.warn("Get corrupt file blocks returned error", e);
} }
return JSON.toString(list); return list;
} }
@Override // NameNodeMXBean @Override // NameNodeMXBean

View File

@ -296,6 +296,13 @@ public interface NameNodeMXBean {
*/ */
String getCorruptFiles(); String getCorruptFiles();
/**
* Get the length of the list of corrupt files.
*
* @return the length of the list of corrupt files.
*/
int getCorruptFilesCount();
/** /**
* Get the number of distinct versions of live datanodes. * Get the number of distinct versions of live datanodes.
* *

View File

@ -95,6 +95,7 @@ public void testListCorruptFilesCorruptedBlock() throws Exception {
getNamesystem().listCorruptFileBlocks("/", null); getNamesystem().listCorruptFileBlocks("/", null);
assertEquals("Namenode has " + badFiles.size() assertEquals("Namenode has " + badFiles.size()
+ " corrupt files. Expecting None.", 0, badFiles.size()); + " corrupt files. Expecting None.", 0, badFiles.size());
assertCorruptFilesCount(cluster, badFiles.size());
// Now deliberately corrupt one block // Now deliberately corrupt one block
String bpid = cluster.getNamesystem().getBlockPoolId(); String bpid = cluster.getNamesystem().getBlockPoolId();
@ -128,8 +129,9 @@ public void testListCorruptFilesCorruptedBlock() throws Exception {
// fetch bad file list from namenode. There should be one file. // fetch bad file list from namenode. There should be one file.
badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null); badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null);
LOG.info("Namenode has bad files. " + badFiles.size()); LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", assertEquals("Namenode has " + badFiles.size() + " bad files. " +
badFiles.size() == 1); "Expecting 1.", 1, badFiles.size());
assertCorruptFilesCount(cluster, badFiles.size());
util.cleanup(fs, "/srcdat10"); util.cleanup(fs, "/srcdat10");
} finally { } finally {
if (cluster != null) { cluster.shutdown(); } if (cluster != null) { cluster.shutdown(); }
@ -176,6 +178,7 @@ public void testListCorruptFileBlocksInSafeMode() throws Exception {
cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null); cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
assertEquals("Namenode has " + badFiles.size() assertEquals("Namenode has " + badFiles.size()
+ " corrupt files. Expecting None.", 0, badFiles.size()); + " corrupt files. Expecting None.", 0, badFiles.size());
assertCorruptFilesCount(cluster, badFiles.size());
// Now deliberately corrupt one block // Now deliberately corrupt one block
File storageDir = cluster.getInstanceStorageDir(0, 0); File storageDir = cluster.getInstanceStorageDir(0, 0);
@ -211,8 +214,9 @@ public void testListCorruptFileBlocksInSafeMode() throws Exception {
badFiles = cluster.getNameNode().getNamesystem(). badFiles = cluster.getNameNode().getNamesystem().
listCorruptFileBlocks("/", null); listCorruptFileBlocks("/", null);
LOG.info("Namenode has bad files. " + badFiles.size()); LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", assertEquals("Namenode has " + badFiles.size() + " bad files. " +
badFiles.size() == 1); "Expecting 1.", 1, badFiles.size());
assertCorruptFilesCount(cluster, badFiles.size());
// restart namenode // restart namenode
cluster.restartNameNode(0); cluster.restartNameNode(0);
@ -243,8 +247,9 @@ public void testListCorruptFileBlocksInSafeMode() throws Exception {
badFiles = cluster.getNameNode().getNamesystem(). badFiles = cluster.getNameNode().getNamesystem().
listCorruptFileBlocks("/", null); listCorruptFileBlocks("/", null);
LOG.info("Namenode has bad files. " + badFiles.size()); LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", assertEquals("Namenode has " + badFiles.size() + " bad files. " +
badFiles.size() == 1); "Expecting 1.", 1, badFiles.size());
assertCorruptFilesCount(cluster, badFiles.size());
// check that we are still in safe mode // check that we are still in safe mode
assertTrue("Namenode is not in safe mode", assertTrue("Namenode is not in safe mode",
@ -288,7 +293,8 @@ public void testlistCorruptFileBlocks() throws Exception {
Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks =
namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null); namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null);
int numCorrupt = corruptFileBlocks.size(); int numCorrupt = corruptFileBlocks.size();
assertTrue(numCorrupt == 0); assertEquals(0, numCorrupt);
assertCorruptFilesCount(cluster, numCorrupt);
// delete the blocks // delete the blocks
String bpid = cluster.getNamesystem().getBlockPoolId(); String bpid = cluster.getNamesystem().getBlockPoolId();
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
@ -328,6 +334,7 @@ public void testlistCorruptFileBlocks() throws Exception {
// Validate we get all the corrupt files // Validate we get all the corrupt files
LOG.info("Namenode has bad files. " + numCorrupt); LOG.info("Namenode has bad files. " + numCorrupt);
assertEquals(3, numCorrupt); assertEquals(3, numCorrupt);
assertCorruptFilesCount(cluster, numCorrupt);
// test the paging here // test the paging here
FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks
@ -400,6 +407,7 @@ public void testlistCorruptFileBlocksDFS() throws Exception {
dfs.listCorruptFileBlocks(new Path("/corruptData")); dfs.listCorruptFileBlocks(new Path("/corruptData"));
int numCorrupt = countPaths(corruptFileBlocks); int numCorrupt = countPaths(corruptFileBlocks);
assertEquals(0, numCorrupt); assertEquals(0, numCorrupt);
assertCorruptFilesCount(cluster, numCorrupt);
// delete the blocks // delete the blocks
String bpid = cluster.getNamesystem().getBlockPoolId(); String bpid = cluster.getNamesystem().getBlockPoolId();
// For loop through number of datadirectories per datanode (2) // For loop through number of datadirectories per datanode (2)
@ -436,6 +444,7 @@ public void testlistCorruptFileBlocksDFS() throws Exception {
// Validate we get all the corrupt files // Validate we get all the corrupt files
LOG.info("Namenode has bad files. " + numCorrupt); LOG.info("Namenode has bad files. " + numCorrupt);
assertEquals(3, numCorrupt); assertEquals(3, numCorrupt);
assertCorruptFilesCount(cluster, numCorrupt);
util.cleanup(fs, "/corruptData"); util.cleanup(fs, "/corruptData");
util.cleanup(fs, "/goodData"); util.cleanup(fs, "/goodData");
@ -477,6 +486,7 @@ public void testMaxCorruptFiles() throws Exception {
assertEquals( assertEquals(
"Namenode has " + badFiles.size() + " corrupt files. Expecting none.", "Namenode has " + badFiles.size() + " corrupt files. Expecting none.",
0, badFiles.size()); 0, badFiles.size());
assertCorruptFilesCount(cluster, badFiles.size());
// Now deliberately blocks from all files // Now deliberately blocks from all files
final String bpid = cluster.getNamesystem().getBlockPoolId(); final String bpid = cluster.getNamesystem().getBlockPoolId();
@ -520,9 +530,9 @@ public void testMaxCorruptFiles() throws Exception {
badFiles = namenode.getNamesystem(). badFiles = namenode.getNamesystem().
listCorruptFileBlocks("/srcdat2", null); listCorruptFileBlocks("/srcdat2", null);
LOG.info("Namenode has bad files. " + badFiles.size()); LOG.info("Namenode has bad files. " + badFiles.size());
assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + assertEquals("Namenode has " + badFiles.size() + " bad files. " +
maxCorruptFileBlocks + ".", "Expecting " + maxCorruptFileBlocks + ".", maxCorruptFileBlocks,
badFiles.size() == maxCorruptFileBlocks); badFiles.size());
CorruptFileBlockIterator iter = (CorruptFileBlockIterator) CorruptFileBlockIterator iter = (CorruptFileBlockIterator)
fs.listCorruptFileBlocks(new Path("/srcdat2")); fs.listCorruptFileBlocks(new Path("/srcdat2"));
@ -566,6 +576,7 @@ public void testListCorruptFileBlocksOnRelativePath() throws Exception {
.listCorruptFileBlocks(new Path("corruptData")); .listCorruptFileBlocks(new Path("corruptData"));
int numCorrupt = countPaths(corruptFileBlocks); int numCorrupt = countPaths(corruptFileBlocks);
assertEquals(0, numCorrupt); assertEquals(0, numCorrupt);
assertCorruptFilesCount(cluster, numCorrupt);
// delete the blocks // delete the blocks
String bpid = cluster.getNamesystem().getBlockPoolId(); String bpid = cluster.getNamesystem().getBlockPoolId();
@ -608,4 +619,16 @@ public void testListCorruptFileBlocksOnRelativePath() throws Exception {
} }
} }
} }
/**
* Asserts that the number of correct files is equal to the expected value.
* @param cluster where to get the number of corrupt files from
* @param expectedCorrupt the expected number of corrupt files
*/
private void assertCorruptFilesCount(MiniDFSCluster cluster,
int expectedCorrupt) {
FSNamesystem fs = cluster.getNameNode().getNamesystem();
assertEquals("Incorrect number of corrupt files returned", expectedCorrupt,
fs.getCorruptFilesCount());
}
} }

View File

@ -222,6 +222,11 @@ public void testNameNodeMXBeanInfo() throws Exception {
"CorruptFiles")); "CorruptFiles"));
assertEquals("Bad value for CorruptFiles", fsn.getCorruptFiles(), assertEquals("Bad value for CorruptFiles", fsn.getCorruptFiles(),
corruptFiles); corruptFiles);
// get attribute CorruptFilesCount
int corruptFilesCount = (int) (mbs.getAttribute(mxbeanName,
"CorruptFilesCount"));
assertEquals("Bad value for CorruptFilesCount",
fsn.getCorruptFilesCount(), corruptFilesCount);
// get attribute NameDirStatuses // get attribute NameDirStatuses
String nameDirStatuses = (String) (mbs.getAttribute(mxbeanName, String nameDirStatuses = (String) (mbs.getAttribute(mxbeanName,
"NameDirStatuses")); "NameDirStatuses"));