HDFS-7281. Missing block is marked as corrupted block (Ming Ma via Yongjun Zhang)

This commit is contained in:
Yongjun Zhang 2015-05-01 08:42:00 -07:00
parent 1b3b9e5c31
commit 279958b772
4 changed files with 63 additions and 20 deletions

View File

@ -20,6 +20,9 @@ Trunk (Unreleased)
HDFS-7985. WebHDFS should be always enabled. (Li Lu via wheat9) HDFS-7985. WebHDFS should be always enabled. (Li Lu via wheat9)
HDFS-7281. Missing block is marked as corrupted block (Ming Ma via
Yongjun Zhang)
NEW FEATURES NEW FEATURES
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh) HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)

View File

@ -849,7 +849,8 @@ private LocatedBlock createLocatedBlock(final BlockInfoContiguous blk, final lon
} }
final int numNodes = blocksMap.numNodes(blk); final int numNodes = blocksMap.numNodes(blk);
final boolean isCorrupt = numCorruptNodes == numNodes; final boolean isCorrupt = numCorruptNodes != 0 &&
numCorruptNodes == numNodes;
final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes; final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes;
final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines]; final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines];
int j = 0; int j = 0;

View File

@ -531,6 +531,7 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res
int missing = 0; int missing = 0;
int corrupt = 0; int corrupt = 0;
long missize = 0; long missize = 0;
long corruptSize = 0;
int underReplicatedPerFile = 0; int underReplicatedPerFile = 0;
int misReplicatedPerFile = 0; int misReplicatedPerFile = 0;
StringBuilder report = new StringBuilder(); StringBuilder report = new StringBuilder();
@ -570,10 +571,11 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res
// count corrupt blocks // count corrupt blocks
boolean isCorrupt = lBlk.isCorrupt(); boolean isCorrupt = lBlk.isCorrupt();
if (isCorrupt) { if (isCorrupt) {
res.addCorrupt(block.getNumBytes());
corrupt++; corrupt++;
res.corruptBlocks++; corruptSize += block.getNumBytes();
out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() + out.print("\n" + path + ": CORRUPT blockpool " +
" block " + block.getBlockName()+"\n"); block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
} }
// count minimally replicated blocks // count minimally replicated blocks
@ -619,7 +621,11 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res
// report // report
String blkName = block.toString(); String blkName = block.toString();
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes()); report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
if (totalReplicasPerBlock == 0) { if (totalReplicasPerBlock == 0 && !isCorrupt) {
// If the block is corrupted, it means all its available replicas are
// corrupted. We don't mark it as missing given these available replicas
// might still be accessible as the block might be incorrectly marked as
// corrupted by client machines.
report.append(" MISSING!"); report.append(" MISSING!");
res.addMissing(block.toString(), block.getNumBytes()); res.addMissing(block.toString(), block.getNumBytes());
missing++; missing++;
@ -674,9 +680,15 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res
// count corrupt file & move or delete if necessary // count corrupt file & move or delete if necessary
if ((missing > 0) || (corrupt > 0)) { if ((missing > 0) || (corrupt > 0)) {
if (!showFiles && (missing > 0)) { if (!showFiles) {
out.print("\n" + path + ": MISSING " + missing if (missing > 0) {
+ " blocks of total size " + missize + " B."); out.print("\n" + path + ": MISSING " + missing
+ " blocks of total size " + missize + " B.");
}
if (corrupt > 0) {
out.print("\n" + path + ": CORRUPT " + corrupt
+ " blocks of total size " + corruptSize + " B.");
}
} }
res.corruptFiles++; res.corruptFiles++;
if (isOpen) { if (isOpen) {
@ -688,9 +700,16 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res
} }
if (showFiles) { if (showFiles) {
if (missing > 0) { if (missing > 0 || corrupt > 0) {
out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); if (missing > 0) {
} else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) { out.print(" MISSING " + missing + " blocks of total size " +
missize + " B\n");
}
if (corrupt > 0) {
out.print(" CORRUPT " + corrupt + " blocks of total size " +
corruptSize + " B\n");
}
} else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
out.print(" OK\n"); out.print(" OK\n");
} }
if (showBlocks) { if (showBlocks) {
@ -956,6 +975,7 @@ static class Result {
long missingSize = 0L; long missingSize = 0L;
long corruptFiles = 0L; long corruptFiles = 0L;
long corruptBlocks = 0L; long corruptBlocks = 0L;
long corruptSize = 0L;
long excessiveReplicas = 0L; long excessiveReplicas = 0L;
long missingReplicas = 0L; long missingReplicas = 0L;
long decommissionedReplicas = 0L; long decommissionedReplicas = 0L;
@ -999,6 +1019,12 @@ void addMissing(String id, long size) {
missingSize += size; missingSize += size;
} }
/** Add a corrupt block. */
void addCorrupt(long size) {
corruptBlocks++;
corruptSize += size;
}
/** Return the actual replication factor. */ /** Return the actual replication factor. */
float getReplicationFactor() { float getReplicationFactor() {
if (totalBlocks == 0) if (totalBlocks == 0)
@ -1051,7 +1077,8 @@ public String toString() {
"\n MISSING SIZE:\t\t").append(missingSize).append(" B"); "\n MISSING SIZE:\t\t").append(missingSize).append(" B");
} }
if (corruptBlocks > 0) { if (corruptBlocks > 0) {
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks); res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks).append(
"\n CORRUPT SIZE:\t\t").append(corruptSize).append(" B");
} }
} }
res.append("\n ********************************"); res.append("\n ********************************");
@ -1086,7 +1113,8 @@ public String toString() {
} }
res.append("\n Default replication factor:\t").append(replication) res.append("\n Default replication factor:\t").append(replication)
.append("\n Average block replication:\t").append( .append("\n Average block replication:\t").append(
getReplicationFactor()).append("\n Corrupt blocks:\t\t").append( getReplicationFactor()).append("\n Missing blocks:\t\t").append(
missingIds.size()).append("\n Corrupt blocks:\t\t").append(
corruptBlocks).append("\n Missing replicas:\t\t").append( corruptBlocks).append("\n Missing replicas:\t\t").append(
missingReplicas); missingReplicas);
if (totalReplicas > 0) { if (totalReplicas > 0) {

View File

@ -121,6 +121,9 @@ public class TestFsck {
"cmd=getfileinfo\\ssrc=\\/\\sdst=null\\s" + "cmd=getfileinfo\\ssrc=\\/\\sdst=null\\s" +
"perm=null\\s" + "proto=.*"); "perm=null\\s" + "proto=.*");
static final Pattern numMissingBlocksPattern = Pattern.compile(
".*Missing blocks:\t\t([0123456789]*).*");
static final Pattern numCorruptBlocksPattern = Pattern.compile( static final Pattern numCorruptBlocksPattern = Pattern.compile(
".*Corrupt blocks:\t\t([0123456789]*).*"); ".*Corrupt blocks:\t\t([0123456789]*).*");
@ -360,19 +363,27 @@ public void testFsckMove() throws Exception {
// Wait for fsck to discover all the missing blocks // Wait for fsck to discover all the missing blocks
while (true) { while (true) {
outStr = runFsck(conf, 1, false, "/"); outStr = runFsck(conf, 1, false, "/");
String numMissing = null;
String numCorrupt = null; String numCorrupt = null;
for (String line : outStr.split(LINE_SEPARATOR)) { for (String line : outStr.split(LINE_SEPARATOR)) {
Matcher m = numCorruptBlocksPattern.matcher(line); Matcher m = numMissingBlocksPattern.matcher(line);
if (m.matches()) {
numMissing = m.group(1);
}
m = numCorruptBlocksPattern.matcher(line);
if (m.matches()) { if (m.matches()) {
numCorrupt = m.group(1); numCorrupt = m.group(1);
}
if (numMissing != null && numCorrupt != null) {
break; break;
} }
} }
if (numCorrupt == null) { if (numMissing == null || numCorrupt == null) {
throw new IOException("failed to find number of corrupt " + throw new IOException("failed to find number of missing or corrupt" +
"blocks in fsck output."); " blocks in fsck output.");
} }
if (numCorrupt.equals(Integer.toString(totalMissingBlocks))) { if (numMissing.equals(Integer.toString(totalMissingBlocks))) {
assertTrue(numCorrupt.equals(Integer.toString(0)));
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
break; break;
} }