HDFS-7537. Add "UNDER MIN REPL'D BLOCKS" count to fsck. Contributed by GAO Rui
This commit is contained in:
parent
8c4f76aa20
commit
725cc499f0
@ -673,6 +673,9 @@ Release 2.7.0 - UNRELEASED
|
||||
HDFS-7495. Remove updatePosition argument from DFSInputStream#getBlockAt()
|
||||
(cmccabe)
|
||||
|
||||
HDFS-7537. Add "UNDER MIN REPL'D BLOCKS" count to fsck. (GAO Rui via
|
||||
szetszwo)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
|
||||
|
@ -512,6 +512,9 @@ void check(String parent, HdfsFileStatus file, Result res) throws IOException {
|
||||
res.totalReplicas += liveReplicas;
|
||||
short targetFileReplication = file.getReplication();
|
||||
res.numExpectedReplicas += targetFileReplication;
|
||||
if(liveReplicas<minReplication){
|
||||
res.numUnderMinReplicatedBlocks++;
|
||||
}
|
||||
if (liveReplicas > targetFileReplication) {
|
||||
res.excessiveReplicas += (liveReplicas - targetFileReplication);
|
||||
res.numOverReplicatedBlocks += 1;
|
||||
@ -858,6 +861,7 @@ static class Result {
|
||||
long corruptBlocks = 0L;
|
||||
long excessiveReplicas = 0L;
|
||||
long missingReplicas = 0L;
|
||||
long numUnderMinReplicatedBlocks=0L;
|
||||
long numOverReplicatedBlocks = 0L;
|
||||
long numUnderReplicatedBlocks = 0L;
|
||||
long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
|
||||
@ -874,10 +878,13 @@ static class Result {
|
||||
long totalReplicas = 0L;
|
||||
|
||||
final short replication;
|
||||
final int minReplication;
|
||||
|
||||
Result(Configuration conf) {
|
||||
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
||||
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
||||
this.minReplication = (short)conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,
|
||||
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -925,15 +932,28 @@ public String toString() {
|
||||
res.append(" (Total open file blocks (not validated): ").append(
|
||||
totalOpenFilesBlocks).append(")");
|
||||
}
|
||||
if (corruptFiles > 0) {
|
||||
res.append("\n ********************************").append(
|
||||
"\n CORRUPT FILES:\t").append(corruptFiles);
|
||||
if (missingSize > 0) {
|
||||
res.append("\n MISSING BLOCKS:\t").append(missingIds.size()).append(
|
||||
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
|
||||
if (corruptFiles > 0 || numUnderMinReplicatedBlocks>0) {
|
||||
res.append("\n ********************************");
|
||||
if(numUnderMinReplicatedBlocks>0){
|
||||
res.append("\n UNDER MIN REPL'D BLOCKS:\t").append(numUnderMinReplicatedBlocks);
|
||||
if(totalBlocks>0){
|
||||
res.append(" (").append(
|
||||
((float) (numUnderMinReplicatedBlocks * 100) / (float) totalBlocks))
|
||||
.append(" %)");
|
||||
}
|
||||
res.append("\n ").append("DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY:\t")
|
||||
.append(minReplication);
|
||||
}
|
||||
if (corruptBlocks > 0) {
|
||||
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
|
||||
if(corruptFiles>0) {
|
||||
res.append(
|
||||
"\n CORRUPT FILES:\t").append(corruptFiles);
|
||||
if (missingSize > 0) {
|
||||
res.append("\n MISSING BLOCKS:\t").append(missingIds.size()).append(
|
||||
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
|
||||
}
|
||||
if (corruptBlocks > 0) {
|
||||
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
|
||||
}
|
||||
}
|
||||
res.append("\n ********************************");
|
||||
}
|
||||
|
@ -693,7 +693,86 @@ public void testCorruptBlock() throws Exception {
|
||||
if (cluster != null) {cluster.shutdown();}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testUnderMinReplicatedBlock() throws Exception {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||
// Set short retry timeouts so this test runs faster
|
||||
conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
|
||||
// Set minReplication to 2
|
||||
short minReplication=2;
|
||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,minReplication);
|
||||
FileSystem fs = null;
|
||||
DFSClient dfsClient = null;
|
||||
LocatedBlocks blocks = null;
|
||||
int replicaCount = 0;
|
||||
Random random = new Random();
|
||||
String outStr = null;
|
||||
short factor = 1;
|
||||
MiniDFSCluster cluster = null;
|
||||
try {
|
||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
|
||||
cluster.waitActive();
|
||||
fs = cluster.getFileSystem();
|
||||
Path file1 = new Path("/testUnderMinReplicatedBlock");
|
||||
DFSTestUtil.createFile(fs, file1, 1024, minReplication, 0);
|
||||
// Wait until file replication has completed
|
||||
DFSTestUtil.waitReplication(fs, file1, minReplication);
|
||||
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
|
||||
|
||||
// Make sure filesystem is in healthy state
|
||||
outStr = runFsck(conf, 0, true, "/");
|
||||
System.out.println(outStr);
|
||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||
|
||||
// corrupt the first replica
|
||||
File blockFile = cluster.getBlockFile(0, block);
|
||||
if (blockFile != null && blockFile.exists()) {
|
||||
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
|
||||
FileChannel channel = raFile.getChannel();
|
||||
String badString = "BADBAD";
|
||||
int rand = random.nextInt((int) channel.size()/2);
|
||||
raFile.seek(rand);
|
||||
raFile.write(badString.getBytes());
|
||||
raFile.close();
|
||||
}
|
||||
|
||||
dfsClient = new DFSClient(new InetSocketAddress("localhost",
|
||||
cluster.getNameNodePort()), conf);
|
||||
blocks = dfsClient.getNamenode().
|
||||
getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
|
||||
replicaCount = blocks.get(0).getLocations().length;
|
||||
while (replicaCount != factor) {
|
||||
try {
|
||||
Thread.sleep(100);
|
||||
// Read the file to trigger reportBadBlocks
|
||||
try {
|
||||
IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf,
|
||||
true);
|
||||
} catch (IOException ie) {
|
||||
// Ignore exception
|
||||
}
|
||||
System.out.println("sleep in try: replicaCount="+replicaCount+" factor="+factor);
|
||||
} catch (InterruptedException ignore) {
|
||||
}
|
||||
blocks = dfsClient.getNamenode().
|
||||
getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
|
||||
replicaCount = blocks.get(0).getLocations().length;
|
||||
}
|
||||
|
||||
// Check if fsck reports the same
|
||||
outStr = runFsck(conf, 0, true, "/");
|
||||
System.out.println(outStr);
|
||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||
assertTrue(outStr.contains("UNDER MIN REPL'D BLOCKS:\t1 (100.0 %)"));
|
||||
assertTrue(outStr.contains("DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY:\t2"));
|
||||
} finally {
|
||||
if (cluster != null) {cluster.shutdown();}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Test if fsck can return -1 in case of failure
|
||||
*
|
||||
* @throws Exception
|
||||
|
Loading…
Reference in New Issue
Block a user