HDFS-3442. Incorrect count for Missing Replicas in FSCK report. Contributed by Andrew Wang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1345408 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1cf6010675
commit
d1207cbd88
@ -298,6 +298,9 @@ Release 2.0.1-alpha - UNRELEASED
|
|||||||
HDFS-3487. offlineimageviewer should give byte offset information
|
HDFS-3487. offlineimageviewer should give byte offset information
|
||||||
when it encounters an exception. (Colin Patrick McCabe via eli)
|
when it encounters an exception. (Colin Patrick McCabe via eli)
|
||||||
|
|
||||||
|
HDFS-3442. Incorrect count for Missing Replicas in FSCK report. (Andrew
|
||||||
|
Wang via atm)
|
||||||
|
|
||||||
Release 2.0.0-alpha - UNRELEASED
|
Release 2.0.0-alpha - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -53,6 +53,8 @@
|
|||||||
import org.apache.hadoop.net.NodeBase;
|
import org.apache.hadoop.net.NodeBase;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides rudimentary checking of DFS volumes for errors and
|
* This class provides rudimentary checking of DFS volumes for errors and
|
||||||
* sub-optimal conditions.
|
* sub-optimal conditions.
|
||||||
@ -244,7 +246,8 @@ private void listCorruptFileBlocks() throws IOException {
|
|||||||
out.println();
|
out.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void check(String parent, HdfsFileStatus file, Result res) throws IOException {
|
@VisibleForTesting
|
||||||
|
void check(String parent, HdfsFileStatus file, Result res) throws IOException {
|
||||||
String path = file.getFullName(parent);
|
String path = file.getFullName(parent);
|
||||||
boolean isOpen = false;
|
boolean isOpen = false;
|
||||||
|
|
||||||
@ -313,6 +316,7 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce
|
|||||||
DatanodeInfo[] locs = lBlk.getLocations();
|
DatanodeInfo[] locs = lBlk.getLocations();
|
||||||
res.totalReplicas += locs.length;
|
res.totalReplicas += locs.length;
|
||||||
short targetFileReplication = file.getReplication();
|
short targetFileReplication = file.getReplication();
|
||||||
|
res.numExpectedReplicas += targetFileReplication;
|
||||||
if (locs.length > targetFileReplication) {
|
if (locs.length > targetFileReplication) {
|
||||||
res.excessiveReplicas += (locs.length - targetFileReplication);
|
res.excessiveReplicas += (locs.length - targetFileReplication);
|
||||||
res.numOverReplicatedBlocks += 1;
|
res.numOverReplicatedBlocks += 1;
|
||||||
@ -608,29 +612,31 @@ private void lostFoundInit(DFSClient dfs) {
|
|||||||
/**
|
/**
|
||||||
* FsckResult of checking, plus overall DFS statistics.
|
* FsckResult of checking, plus overall DFS statistics.
|
||||||
*/
|
*/
|
||||||
private static class Result {
|
@VisibleForTesting
|
||||||
private List<String> missingIds = new ArrayList<String>();
|
static class Result {
|
||||||
private long missingSize = 0L;
|
List<String> missingIds = new ArrayList<String>();
|
||||||
private long corruptFiles = 0L;
|
long missingSize = 0L;
|
||||||
private long corruptBlocks = 0L;
|
long corruptFiles = 0L;
|
||||||
private long excessiveReplicas = 0L;
|
long corruptBlocks = 0L;
|
||||||
private long missingReplicas = 0L;
|
long excessiveReplicas = 0L;
|
||||||
private long numOverReplicatedBlocks = 0L;
|
long missingReplicas = 0L;
|
||||||
private long numUnderReplicatedBlocks = 0L;
|
long numOverReplicatedBlocks = 0L;
|
||||||
private long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
|
long numUnderReplicatedBlocks = 0L;
|
||||||
private long numMinReplicatedBlocks = 0L; // minimally replicatedblocks
|
long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
|
||||||
private long totalBlocks = 0L;
|
long numMinReplicatedBlocks = 0L; // minimally replicatedblocks
|
||||||
private long totalOpenFilesBlocks = 0L;
|
long totalBlocks = 0L;
|
||||||
private long totalFiles = 0L;
|
long numExpectedReplicas = 0L;
|
||||||
private long totalOpenFiles = 0L;
|
long totalOpenFilesBlocks = 0L;
|
||||||
private long totalDirs = 0L;
|
long totalFiles = 0L;
|
||||||
private long totalSize = 0L;
|
long totalOpenFiles = 0L;
|
||||||
private long totalOpenFilesSize = 0L;
|
long totalDirs = 0L;
|
||||||
private long totalReplicas = 0L;
|
long totalSize = 0L;
|
||||||
|
long totalOpenFilesSize = 0L;
|
||||||
|
long totalReplicas = 0L;
|
||||||
|
|
||||||
final short replication;
|
final short replication;
|
||||||
|
|
||||||
private Result(Configuration conf) {
|
Result(Configuration conf) {
|
||||||
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
||||||
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
||||||
}
|
}
|
||||||
@ -726,7 +732,7 @@ public String toString() {
|
|||||||
missingReplicas);
|
missingReplicas);
|
||||||
if (totalReplicas > 0) {
|
if (totalReplicas > 0) {
|
||||||
res.append(" (").append(
|
res.append(" (").append(
|
||||||
((float) (missingReplicas * 100) / (float) totalReplicas)).append(
|
((float) (missingReplicas * 100) / (float) numExpectedReplicas)).append(
|
||||||
" %)");
|
" %)");
|
||||||
}
|
}
|
||||||
return res.toString();
|
return res.toString();
|
||||||
|
@ -18,21 +18,27 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.net.InetAddress;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.impl.Log4JLogger;
|
import org.apache.commons.logging.impl.Log4JLogger;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
@ -42,25 +48,30 @@
|
|||||||
import org.apache.hadoop.hdfs.DFSClient;
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
||||||
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
|
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
import org.apache.hadoop.hdfs.tools.DFSck;
|
import org.apache.hadoop.hdfs.tools.DFSck;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
import org.apache.hadoop.net.NetworkTopology;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.log4j.PatternLayout;
|
import org.apache.log4j.PatternLayout;
|
||||||
import org.apache.log4j.RollingFileAppender;
|
import org.apache.log4j.RollingFileAppender;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A JUnit test for doing fsck
|
* A JUnit test for doing fsck
|
||||||
*/
|
*/
|
||||||
public class TestFsck extends TestCase {
|
public class TestFsck {
|
||||||
static final String auditLogFile = System.getProperty("test.build.dir",
|
static final String auditLogFile = System.getProperty("test.build.dir",
|
||||||
"build/test") + "/audit.log";
|
"build/test") + "/audit.log";
|
||||||
|
|
||||||
@ -79,13 +90,15 @@ static String runFsck(Configuration conf, int expectedErrCode,
|
|||||||
PrintStream out = new PrintStream(bStream, true);
|
PrintStream out = new PrintStream(bStream, true);
|
||||||
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL);
|
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL);
|
||||||
int errCode = ToolRunner.run(new DFSck(conf, out), path);
|
int errCode = ToolRunner.run(new DFSck(conf, out), path);
|
||||||
if (checkErrorCode)
|
if (checkErrorCode) {
|
||||||
assertEquals(expectedErrCode, errCode);
|
assertEquals(expectedErrCode, errCode);
|
||||||
|
}
|
||||||
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO);
|
((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO);
|
||||||
return bStream.toString();
|
return bStream.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** do fsck */
|
/** do fsck */
|
||||||
|
@Test
|
||||||
public void testFsck() throws Exception {
|
public void testFsck() throws Exception {
|
||||||
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
|
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
@ -158,6 +171,7 @@ private void verifyAuditLogs() throws IOException {
|
|||||||
assertNull("Unexpected event in audit log", reader.readLine());
|
assertNull("Unexpected event in audit log", reader.readLine());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
public void testFsckNonExistent() throws Exception {
|
public void testFsckNonExistent() throws Exception {
|
||||||
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
|
DFSTestUtil util = new DFSTestUtil("TestFsck", 20, 3, 8*1024);
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
@ -180,6 +194,7 @@ public void testFsckNonExistent() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Test fsck with permission set on inodes */
|
/** Test fsck with permission set on inodes */
|
||||||
|
@Test
|
||||||
public void testFsckPermission() throws Exception {
|
public void testFsckPermission() throws Exception {
|
||||||
final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024);
|
final DFSTestUtil util = new DFSTestUtil(getClass().getSimpleName(), 20, 3, 8*1024);
|
||||||
final Configuration conf = new HdfsConfiguration();
|
final Configuration conf = new HdfsConfiguration();
|
||||||
@ -227,6 +242,7 @@ public Object run() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
public void testFsckMoveAndDelete() throws Exception {
|
public void testFsckMoveAndDelete() throws Exception {
|
||||||
final int MAX_MOVE_TRIES = 5;
|
final int MAX_MOVE_TRIES = 5;
|
||||||
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
|
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
|
||||||
@ -300,6 +316,7 @@ public void testFsckMoveAndDelete() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
public void testFsckOpenFiles() throws Exception {
|
public void testFsckOpenFiles() throws Exception {
|
||||||
DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024);
|
DFSTestUtil util = new DFSTestUtil("TestFsck", 4, 3, 8*1024);
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
@ -350,6 +367,7 @@ public void testFsckOpenFiles() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
public void testCorruptBlock() throws Exception {
|
public void testCorruptBlock() throws Exception {
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||||
@ -426,6 +444,7 @@ public void testCorruptBlock() throws Exception {
|
|||||||
*
|
*
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
|
@Test
|
||||||
public void testFsckError() throws Exception {
|
public void testFsckError() throws Exception {
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
try {
|
try {
|
||||||
@ -460,6 +479,7 @@ public void testFsckError() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** check if option -list-corruptfiles of fsck command works properly */
|
/** check if option -list-corruptfiles of fsck command works properly */
|
||||||
|
@Test
|
||||||
public void testFsckListCorruptFilesBlocks() throws Exception {
|
public void testFsckListCorruptFilesBlocks() throws Exception {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||||
@ -529,6 +549,7 @@ public void testFsckListCorruptFilesBlocks() throws Exception {
|
|||||||
* Test for checking fsck command on illegal arguments should print the proper
|
* Test for checking fsck command on illegal arguments should print the proper
|
||||||
* usage.
|
* usage.
|
||||||
*/
|
*/
|
||||||
|
@Test
|
||||||
public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
|
public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
try {
|
try {
|
||||||
@ -560,4 +581,73 @@ public void testToCheckTheFsckCommandOnIllegalArguments() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that the # of missing block replicas and expected replicas is correct
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFsckMissingReplicas() throws IOException {
|
||||||
|
// Desired replication factor
|
||||||
|
// Set this higher than NUM_REPLICAS so it's under-replicated
|
||||||
|
final short REPL_FACTOR = 2;
|
||||||
|
// Number of replicas to actually start
|
||||||
|
final short NUM_REPLICAS = 1;
|
||||||
|
// Number of blocks to write
|
||||||
|
final short NUM_BLOCKS = 3;
|
||||||
|
// Set a small-ish blocksize
|
||||||
|
final long blockSize = 512;
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
DistributedFileSystem dfs = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Startup a minicluster
|
||||||
|
cluster =
|
||||||
|
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_REPLICAS).build();
|
||||||
|
assertNotNull("Failed Cluster Creation", cluster);
|
||||||
|
cluster.waitClusterUp();
|
||||||
|
dfs = (DistributedFileSystem) cluster.getFileSystem();
|
||||||
|
assertNotNull("Failed to get FileSystem", dfs);
|
||||||
|
|
||||||
|
// Create a file that will be intentionally under-replicated
|
||||||
|
final String pathString = new String("/testfile");
|
||||||
|
final Path path = new Path(pathString);
|
||||||
|
long fileLen = blockSize * NUM_BLOCKS;
|
||||||
|
DFSTestUtil.createFile(dfs, path, fileLen, REPL_FACTOR, 1);
|
||||||
|
|
||||||
|
// Create an under-replicated file
|
||||||
|
NameNode namenode = cluster.getNameNode();
|
||||||
|
NetworkTopology nettop = cluster.getNamesystem().getBlockManager()
|
||||||
|
.getDatanodeManager().getNetworkTopology();
|
||||||
|
Map<String,String[]> pmap = new HashMap<String, String[]>();
|
||||||
|
Writer result = new StringWriter();
|
||||||
|
PrintWriter out = new PrintWriter(result, true);
|
||||||
|
InetAddress remoteAddress = InetAddress.getLocalHost();
|
||||||
|
NamenodeFsck fsck = new NamenodeFsck(conf, namenode, nettop, pmap, out,
|
||||||
|
NUM_REPLICAS, (short)1, remoteAddress);
|
||||||
|
|
||||||
|
// Run the fsck and check the Result
|
||||||
|
final HdfsFileStatus file =
|
||||||
|
namenode.getRpcServer().getFileInfo(pathString);
|
||||||
|
assertNotNull(file);
|
||||||
|
Result res = new Result(conf);
|
||||||
|
fsck.check(pathString, file, res);
|
||||||
|
// Also print the output from the fsck, for ex post facto sanity checks
|
||||||
|
System.out.println(result.toString());
|
||||||
|
assertEquals(res.missingReplicas,
|
||||||
|
(NUM_BLOCKS*REPL_FACTOR) - (NUM_BLOCKS*NUM_REPLICAS));
|
||||||
|
assertEquals(res.numExpectedReplicas, NUM_BLOCKS*REPL_FACTOR);
|
||||||
|
} finally {
|
||||||
|
if(dfs != null) {
|
||||||
|
dfs.close();
|
||||||
|
}
|
||||||
|
if(cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user