HDFS-3442. Incorrect count for Missing Replicas in FSCK report. Contributed by Andrew Wang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1345408 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-06-02 00:39:34 +00:00
parent 1cf6010675
commit d1207cbd88
3 changed files with 126 additions and 27 deletions

View File

@ -298,6 +298,9 @@ Release 2.0.1-alpha - UNRELEASED
HDFS-3487. offlineimageviewer should give byte offset information HDFS-3487. offlineimageviewer should give byte offset information
when it encounters an exception. (Colin Patrick McCabe via eli) when it encounters an exception. (Colin Patrick McCabe via eli)
HDFS-3442. Incorrect count for Missing Replicas in FSCK report. (Andrew
Wang via atm)
Release 2.0.0-alpha - UNRELEASED Release 2.0.0-alpha - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -53,6 +53,8 @@
import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.net.NodeBase;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import com.google.common.annotations.VisibleForTesting;
/** /**
* This class provides rudimentary checking of DFS volumes for errors and * This class provides rudimentary checking of DFS volumes for errors and
* sub-optimal conditions. * sub-optimal conditions.
@ -244,7 +246,8 @@ private void listCorruptFileBlocks() throws IOException {
out.println(); out.println();
} }
private void check(String parent, HdfsFileStatus file, Result res) throws IOException { @VisibleForTesting
void check(String parent, HdfsFileStatus file, Result res) throws IOException {
String path = file.getFullName(parent); String path = file.getFullName(parent);
boolean isOpen = false; boolean isOpen = false;
@ -313,6 +316,7 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce
DatanodeInfo[] locs = lBlk.getLocations(); DatanodeInfo[] locs = lBlk.getLocations();
res.totalReplicas += locs.length; res.totalReplicas += locs.length;
short targetFileReplication = file.getReplication(); short targetFileReplication = file.getReplication();
res.numExpectedReplicas += targetFileReplication;
if (locs.length > targetFileReplication) { if (locs.length > targetFileReplication) {
res.excessiveReplicas += (locs.length - targetFileReplication); res.excessiveReplicas += (locs.length - targetFileReplication);
res.numOverReplicatedBlocks += 1; res.numOverReplicatedBlocks += 1;
@ -608,29 +612,31 @@ private void lostFoundInit(DFSClient dfs) {
/** /**
* FsckResult of checking, plus overall DFS statistics. * FsckResult of checking, plus overall DFS statistics.
*/ */
private static class Result { @VisibleForTesting
private List<String> missingIds = new ArrayList<String>(); static class Result {
private long missingSize = 0L; List<String> missingIds = new ArrayList<String>();
private long corruptFiles = 0L; long missingSize = 0L;
private long corruptBlocks = 0L; long corruptFiles = 0L;
private long excessiveReplicas = 0L; long corruptBlocks = 0L;
private long missingReplicas = 0L; long excessiveReplicas = 0L;
private long numOverReplicatedBlocks = 0L; long missingReplicas = 0L;
private long numUnderReplicatedBlocks = 0L; long numOverReplicatedBlocks = 0L;
private long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy long numUnderReplicatedBlocks = 0L;
private long numMinReplicatedBlocks = 0L; // minimally replicatedblocks long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
private long totalBlocks = 0L; long numMinReplicatedBlocks = 0L; // minimally replicatedblocks
private long totalOpenFilesBlocks = 0L; long totalBlocks = 0L;
private long totalFiles = 0L; long numExpectedReplicas = 0L;
private long totalOpenFiles = 0L; long totalOpenFilesBlocks = 0L;
private long totalDirs = 0L; long totalFiles = 0L;
private long totalSize = 0L; long totalOpenFiles = 0L;
private long totalOpenFilesSize = 0L; long totalDirs = 0L;
private long totalReplicas = 0L; long totalSize = 0L;
long totalOpenFilesSize = 0L;
long totalReplicas = 0L;
final short replication; final short replication;
private Result(Configuration conf) { Result(Configuration conf) {
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
DFSConfigKeys.DFS_REPLICATION_DEFAULT); DFSConfigKeys.DFS_REPLICATION_DEFAULT);
} }
@ -726,7 +732,7 @@ public String toString() {
missingReplicas); missingReplicas);
if (totalReplicas > 0) { if (totalReplicas > 0) {
res.append(" (").append( res.append(" (").append(
((float) (missingReplicas * 100) / (float) totalReplicas)).append( ((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append(
" %)"); " %)");
} }
return res.toString(); return res.toString();

View File

@ -18,21 +18,27 @@
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.*;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.io.StringWriter;
import java.io.Writer;
import java.net.InetAddress;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.HashMap;
import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import junit.framework.TestCase;
import org.apache.commons.logging.impl.Log4JLogger; import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
@ -42,25 +48,30 @@
import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks; import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.hdfs.tools.DFSck; import org.apache.hadoop.hdfs.tools.DFSck;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level; import org.apache.log4j.Level;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout; import org.apache.log4j.PatternLayout;
import org.apache.log4j.RollingFileAppender; import org.apache.log4j.RollingFileAppender;
import org.junit.Test;
/** /**
* A JUnit test for doing fsck * A JUnit test for doing fsck
*/ */
public class TestFsck extends TestCase { public class TestFsck {
static final String auditLogFile = System.getProperty("test.build.dir", static final String auditLogFile = System.getProperty("test.build.dir",
"build/test") + "/audit.log"; "build/test") + "/audit.log";
@ -79,13 +90,15 @@ static String runFsck(Configuration conf, int expectedErrCode,
PrintStream out = new PrintStream(bStream, true); PrintStream out = new PrintStream(bStream, true);
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL); ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL);
int errCode = ToolRunner.run(new DFSck(conf, out), path); int errCode = ToolRunner.run(new DFSck(conf, out), path);
if (checkErrorCode) if (checkErrorCode) {
assertEquals(expectedErrCode, errCode); assertEquals(expectedErrCode, errCode);
}
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO); ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO);
return bStream.toString(); return bStream.toString();
} }
/** do fsck */ /** do fsck */
@Test
public void testFsck() throws Exception { public void testFsck() throws Exception {
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024); DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
@ -158,6 +171,7 @@ private void verifyAuditLogs() throws IOException {
assertNull("Unexpected event in audit log", reader.readLine()); assertNull("Unexpected event in audit log", reader.readLine());
} }
@Test
public void testFsckNonExistent() throws Exception { public void testFsckNonExistent() throws Exception {
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024); DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
@ -180,6 +194,7 @@ public void testFsckNonExistent() throws Exception {
} }
/** Test fsck with permission set on inodes */ /** Test fsck with permission set on inodes */
@Test
public void testFsckPermission() throws Exception { public void testFsckPermission() throws Exception {
final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024); final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024);
final Configuration conf = new HdfsConfiguration(); final Configuration conf = new HdfsConfiguration();
@ -227,6 +242,7 @@ public Object run() throws Exception {
} }
} }
@Test
public void testFsckMoveAndDelete() throws Exception { public void testFsckMoveAndDelete() throws Exception {
final int MAX_MOVE_TRIES = 5; final int MAX_MOVE_TRIES = 5;
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024); DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
@ -300,6 +316,7 @@ public void testFsckMoveAndDelete() throws Exception {
} }
} }
@Test
public void testFsckOpenFiles() throws Exception { public void testFsckOpenFiles() throws Exception {
DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024); DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024);
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
@ -350,6 +367,7 @@ public void testFsckOpenFiles() throws Exception {
} }
} }
@Test
public void testCorruptBlock() throws Exception { public void testCorruptBlock() throws Exception {
Configuration conf = new HdfsConfiguration(); Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
@ -426,6 +444,7 @@ public void testCorruptBlock() throws Exception {
* *
* @throws Exception * @throws Exception
*/ */
@Test
public void testFsckError() throws Exception { public void testFsckError() throws Exception {
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
try { try {
@ -460,6 +479,7 @@ public void testFsckError() throws Exception {
} }
/** check if option -list-corruptfiles of fsck command works properly */ /** check if option -list-corruptfiles of fsck command works properly */
@Test
public void testFsckListCorruptFilesBlocks() throws Exception { public void testFsckListCorruptFilesBlocks() throws Exception {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
@ -529,6 +549,7 @@ public void testFsckListCorruptFilesBlocks() throws Exception {
* Test for checking fsck command on illegal arguments should print the proper * Test for checking fsck command on illegal arguments should print the proper
* usage. * usage.
*/ */
@Test
public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception { public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
try { try {
@ -560,4 +581,73 @@ public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
} }
} }
} }
/**
* Tests that the # of missing block replicas and expected replicas is correct
* @throws IOException
*/
@Test
public void testFsckMissingReplicas() throws IOException {
// Desired replication factor
// Set this higher than NUM_REPLICAS so it's under-replicated
final short REPL_FACTOR = 2;
// Number of replicas to actually start
final short NUM_REPLICAS = 1;
// Number of blocks to write
final short NUM_BLOCKS = 3;
// Set a small-ish blocksize
final long blockSize = 512;
Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
MiniDFSCluster cluster = null;
DistributedFileSystem dfs = null;
try {
// Startup a minicluster
cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_REPLICAS).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = (DistributedFileSystem) cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
// Create a file that will be intentionally under-replicated
final String pathString = new String("/testfile");
final Path path = new Path(pathString);
long fileLen = blockSize * NUM_BLOCKS;
DFSTestUtil.createFile(dfs, path, fileLen, REPL_FACTOR, 1);
// Create an under-replicated file
NameNode namenode = cluster.getNameNode();
NetworkTopology nettop = cluster.getNamesystem().getBlockManager()
.getDatanodeManager().getNetworkTopology();
Map<String,String[]> pmap = new HashMap<String, String[]>();
Writer result = new StringWriter();
PrintWriter out = new PrintWriter(result, true);
InetAddress remoteAddress = InetAddress.getLocalHost();
NamenodeFsck fsck = new NamenodeFsck(conf, namenode, nettop, pmap, out,
NUM_REPLICAS, (short)1, remoteAddress);
// Run the fsck and check the Result
final HdfsFileStatus file =
namenode.getRpcServer().getFileInfo(pathString);
assertNotNull(file);
Result res = new Result(conf);
fsck.check(pathString, file, res);
// Also print the output from the fsck, for ex post facto sanity checks
System.out.println(result.toString());
assertEquals(res.missingReplicas,
(NUM_BLOCKS*REPL_FACTOR) - (NUM_BLOCKS*NUM_REPLICAS));
assertEquals(res.numExpectedReplicas, NUM_BLOCKS*REPL_FACTOR);
} finally {
if(dfs != null) {
dfs.close();
}
if(cluster != null) {
cluster.shutdown();
}
}
}
} }