HDFS-6663. Admin command to track file and locations from block id.
Contributed by Chen He.
This commit is contained in:
parent
0d3e7e2bd6
commit
371a3b87ed
@ -262,6 +262,9 @@ Release 2.7.0 - UNRELEASED
|
||||
HDFS-7278. Add a command that allows sysadmins to manually trigger full
|
||||
block reports from a DN (cmccabe)
|
||||
|
||||
HDFS-6663. Admin command to track file and locations from block id.
|
||||
(Chen He via kihwal)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HDFS-7055. Add tracing to DFSInputStream (cmccabe)
|
||||
|
@ -3505,6 +3505,13 @@ public Collection<DatanodeDescriptor> getCorruptReplicas(Block block) {
|
||||
return corruptReplicas.getNodes(block);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get reason for certain corrupted replicas for a given block and a given dn.
|
||||
*/
|
||||
public String getCorruptReason(Block block, DatanodeDescriptor node) {
|
||||
return corruptReplicas.getCorruptReason(block, node);
|
||||
}
|
||||
|
||||
/** @return the size of UnderReplicatedBlocks */
|
||||
public int numOfUnderReplicatedBlocks() {
|
||||
return neededReplications.size();
|
||||
|
@ -224,4 +224,25 @@ long[] getCorruptReplicaBlockIds(int numExpectedBlocks,
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the reason about corrupted replica for a given block
|
||||
* on a given dn
|
||||
* @param block block that has corrupted replica
|
||||
* @param node datanode that contains this corrupted replica
|
||||
* @return reason
|
||||
*/
|
||||
String getCorruptReason(Block block, DatanodeDescriptor node) {
|
||||
Reason reason = null;
|
||||
if(corruptReplicasMap.containsKey(block)) {
|
||||
if (corruptReplicasMap.get(block).containsKey(node)) {
|
||||
reason = corruptReplicasMap.get(block).get(node);
|
||||
}
|
||||
}
|
||||
if (reason != null) {
|
||||
return reason.toString();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -47,6 +47,7 @@
|
||||
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
||||
import org.apache.hadoop.hdfs.net.Peer;
|
||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
|
||||
@ -59,8 +60,12 @@
|
||||
import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataEncryptionKeyFactory;
|
||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||
@ -103,6 +108,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||
// return string marking fsck status
|
||||
public static final String CORRUPT_STATUS = "is CORRUPT";
|
||||
public static final String HEALTHY_STATUS = "is HEALTHY";
|
||||
public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING";
|
||||
public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED";
|
||||
public static final String NONEXISTENT_STATUS = "does not exist";
|
||||
public static final String FAILURE_STATUS = "FAILED";
|
||||
|
||||
@ -143,7 +150,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||
*/
|
||||
private boolean doDelete = false;
|
||||
|
||||
private String path = "/";
|
||||
String path = "/";
|
||||
|
||||
private String blockIds = null;
|
||||
|
||||
// We return back N files that are corrupt; the list of files returned is
|
||||
// ordered by block id; to allow continuation support, pass in the last block
|
||||
@ -195,21 +204,112 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||
else if (key.equals("openforwrite")) {this.showOpenFiles = true; }
|
||||
else if (key.equals("listcorruptfileblocks")) {
|
||||
this.showCorruptFileBlocks = true;
|
||||
}
|
||||
else if (key.equals("startblockafter")) {
|
||||
} else if (key.equals("startblockafter")) {
|
||||
this.currentCookie[0] = pmap.get("startblockafter")[0];
|
||||
} else if (key.equals("includeSnapshots")) {
|
||||
this.snapshottableDirs = new ArrayList<String>();
|
||||
} else if (key.equals("blockId")) {
|
||||
this.blockIds = pmap.get("blockId")[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check block information given a blockId number
|
||||
*
|
||||
*/
|
||||
public void blockIdCK(String blockId) {
|
||||
|
||||
if(blockId == null) {
|
||||
out.println("Please provide valid blockId!");
|
||||
return;
|
||||
}
|
||||
|
||||
BlockManager bm = namenode.getNamesystem().getBlockManager();
|
||||
try {
|
||||
//get blockInfo
|
||||
Block block = new Block(Block.getBlockId(blockId));
|
||||
//find which file this block belongs to
|
||||
BlockInfo blockInfo = bm.getStoredBlock(block);
|
||||
if(blockInfo == null) {
|
||||
out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
|
||||
LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
|
||||
return;
|
||||
}
|
||||
BlockCollection bc = bm.getBlockCollection(blockInfo);
|
||||
INode iNode = (INode) bc;
|
||||
NumberReplicas numberReplicas= bm.countNodes(block);
|
||||
out.println("Block Id: " + blockId);
|
||||
out.println("Block belongs to: "+iNode.getFullPathName());
|
||||
out.println("No. of Expected Replica: " + bc.getBlockReplication());
|
||||
out.println("No. of live Replica: " + numberReplicas.liveReplicas());
|
||||
out.println("No. of excess Replica: " + numberReplicas.excessReplicas());
|
||||
out.println("No. of stale Replica: " + numberReplicas.replicasOnStaleNodes());
|
||||
out.println("No. of decommission Replica: "
|
||||
+ numberReplicas.decommissionedReplicas());
|
||||
out.println("No. of corrupted Replica: " + numberReplicas.corruptReplicas());
|
||||
//record datanodes that have corrupted block replica
|
||||
Collection<DatanodeDescriptor> corruptionRecord = null;
|
||||
if (bm.getCorruptReplicas(block) != null) {
|
||||
corruptionRecord = bm.getCorruptReplicas(block);
|
||||
}
|
||||
|
||||
//report block replicas status on datanodes
|
||||
for(int idx = (blockInfo.numNodes()-1); idx >= 0; idx--) {
|
||||
DatanodeDescriptor dn = blockInfo.getDatanode(idx);
|
||||
out.print("Block replica on datanode/rack: " + dn.getHostName() +
|
||||
dn.getNetworkLocation() + " ");
|
||||
if (corruptionRecord != null && corruptionRecord.contains(dn)) {
|
||||
out.print(CORRUPT_STATUS+"\t ReasonCode: "+
|
||||
bm.getCorruptReason(block,dn));
|
||||
} else if (dn.isDecommissioned() ){
|
||||
out.print(DECOMMISSIONED_STATUS);
|
||||
} else if (dn.isDecommissionInProgress()) {
|
||||
out.print(DECOMMISSIONING_STATUS);
|
||||
} else {
|
||||
out.print(HEALTHY_STATUS);
|
||||
}
|
||||
out.print("\n");
|
||||
}
|
||||
} catch (Exception e){
|
||||
String errMsg = "Fsck on blockId '" + blockId;
|
||||
LOG.warn(errMsg, e);
|
||||
out.println(e.getMessage());
|
||||
out.print("\n\n" + errMsg);
|
||||
LOG.warn("Error in looking up block", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check files on DFS, starting from the indicated path.
|
||||
*/
|
||||
public void fsck() {
|
||||
final long startTime = Time.now();
|
||||
try {
|
||||
if(blockIds != null) {
|
||||
|
||||
String[] blocks = blockIds.split(" ");
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("FSCK started by " +
|
||||
UserGroupInformation.getCurrentUser() + " from " +
|
||||
remoteAddress + " at " + new Date());
|
||||
out.println(sb.toString());
|
||||
sb.append(" for blockIds: \n");
|
||||
for (String blk: blocks) {
|
||||
if(blk == null || !blk.contains("blk_")) {
|
||||
out.println("Incorrect blockId format: " + blk);
|
||||
continue;
|
||||
}
|
||||
out.print("\n");
|
||||
blockIdCK(blk);
|
||||
sb.append(blk + "\n");
|
||||
}
|
||||
LOG.info(sb.toString());
|
||||
namenode.getNamesystem().logFsckEvent("/", remoteAddress);
|
||||
out.flush();
|
||||
return;
|
||||
}
|
||||
|
||||
String msg = "FSCK started by " + UserGroupInformation.getCurrentUser()
|
||||
+ " from " + remoteAddress + " for path " + path + " at " + new Date();
|
||||
LOG.info(msg);
|
||||
|
@ -92,7 +92,10 @@ public class DFSck extends Configured implements Tool {
|
||||
+ "\t-blocks\tprint out block report\n"
|
||||
+ "\t-locations\tprint out locations for every block\n"
|
||||
+ "\t-racks\tprint out network topology for data-node locations\n"
|
||||
+ "\t-showprogress\tshow progress in output. Default is OFF (no progress)\n\n"
|
||||
+ "\t-showprogress\tshow progress in output. Default is OFF (no progress)\n"
|
||||
+ "\t-blockId\tprint out which file this blockId belongs to, locations"
|
||||
+ " (nodes, racks) of this block, and other diagnostics info"
|
||||
+ " (under replicated, corrupted or not, etc)\n\n"
|
||||
+ "Please Note:\n"
|
||||
+ "\t1. By default fsck ignores files opened for write, "
|
||||
+ "use -openforwrite to report such files. They are usually "
|
||||
@ -278,6 +281,15 @@ else if (args[idx].equals("-list-corruptfileblocks")) {
|
||||
doListCorruptFileBlocks = true;
|
||||
} else if (args[idx].equals("-includeSnapshots")) {
|
||||
url.append("&includeSnapshots=1");
|
||||
} else if (args[idx].equals("-blockId")) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
idx++;
|
||||
while(idx < args.length && !args[idx].startsWith("-")){
|
||||
sb.append(args[idx]);
|
||||
sb.append(" ");
|
||||
idx++;
|
||||
}
|
||||
url.append("&blockId=").append(URLEncoder.encode(sb.toString(), "UTF-8"));
|
||||
} else if (!args[idx].startsWith("-")) {
|
||||
if (null == dir) {
|
||||
dir = args[idx];
|
||||
@ -287,6 +299,7 @@ else if (args[idx].equals("-list-corruptfileblocks")) {
|
||||
printUsage(System.err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
} else {
|
||||
System.err.println("fsck: Illegal option '" + args[idx] + "'");
|
||||
printUsage(System.err);
|
||||
@ -327,6 +340,12 @@ else if (args[idx].equals("-list-corruptfileblocks")) {
|
||||
errCode = 1;
|
||||
} else if (lastLine.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) {
|
||||
errCode = 0;
|
||||
} else if (lastLine.contains("Incorrect blockId format:")) {
|
||||
errCode = 0;
|
||||
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONED_STATUS)) {
|
||||
errCode = 2;
|
||||
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
|
||||
errCode = 3;
|
||||
}
|
||||
return errCode;
|
||||
}
|
||||
|
@ -18,13 +18,6 @@
|
||||
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
@ -48,6 +41,7 @@
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.commons.logging.impl.Log4JLogger;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
@ -66,11 +60,14 @@
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
|
||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||
@ -86,8 +83,17 @@
|
||||
import org.apache.log4j.RollingFileAppender;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import static org.mockito.Mockito.*;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.mockito.Matchers.anyBoolean;
|
||||
import static org.mockito.Matchers.anyLong;
|
||||
import static org.mockito.Matchers.anyString;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
/**
|
||||
* A JUnit test for doing fsck
|
||||
@ -1096,4 +1102,227 @@ public void testFsckForSnapshotFiles() throws Exception {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for blockIdCK
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void testBlockIdCK() throws Exception {
|
||||
|
||||
final short REPL_FACTOR = 2;
|
||||
short NUM_DN = 2;
|
||||
final long blockSize = 512;
|
||||
|
||||
String [] racks = {"/rack1", "/rack2"};
|
||||
String [] hosts = {"host1", "host2"};
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
|
||||
|
||||
MiniDFSCluster cluster = null;
|
||||
DistributedFileSystem dfs = null;
|
||||
cluster =
|
||||
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
|
||||
.racks(racks).build();
|
||||
|
||||
assertNotNull("Failed Cluster Creation", cluster);
|
||||
cluster.waitClusterUp();
|
||||
dfs = cluster.getFileSystem();
|
||||
assertNotNull("Failed to get FileSystem", dfs);
|
||||
|
||||
DFSTestUtil util = new DFSTestUtil.Builder().
|
||||
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
||||
//create files
|
||||
final String pathString = new String("/testfile");
|
||||
final Path path = new Path(pathString);
|
||||
util.createFile(dfs, path, 1024, REPL_FACTOR , 1000L);
|
||||
util.waitReplication(dfs, path, REPL_FACTOR);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
||||
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
||||
}
|
||||
String[] bIds = sb.toString().split(" ");
|
||||
|
||||
//run fsck
|
||||
try {
|
||||
//illegal input test
|
||||
String runFsckResult = runFsck(conf, 0, true, "/", "-blockId",
|
||||
"not_a_block_id");
|
||||
assertTrue(runFsckResult.contains("Incorrect blockId format:"));
|
||||
|
||||
//general test
|
||||
runFsckResult = runFsck(conf, 0, true, "/", "-blockId", sb.toString());
|
||||
assertTrue(runFsckResult.contains(bIds[0]));
|
||||
assertTrue(runFsckResult.contains(bIds[1]));
|
||||
assertTrue(runFsckResult.contains(
|
||||
"Block replica on datanode/rack: host1/rack1 is HEALTHY"));
|
||||
assertTrue(runFsckResult.contains(
|
||||
"Block replica on datanode/rack: host2/rack2 is HEALTHY"));
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for blockIdCK with datanode decommission
|
||||
*/
|
||||
@Test
|
||||
public void testBlockIdCKDecommission() throws Exception {
|
||||
|
||||
final short REPL_FACTOR = 1;
|
||||
short NUM_DN = 2;
|
||||
final long blockSize = 512;
|
||||
boolean checkDecommissionInProgress = false;
|
||||
String [] racks = {"/rack1", "/rack2"};
|
||||
String [] hosts = {"host1", "host2"};
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
|
||||
|
||||
MiniDFSCluster cluster;
|
||||
DistributedFileSystem dfs ;
|
||||
cluster =
|
||||
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
|
||||
.racks(racks).build();
|
||||
|
||||
assertNotNull("Failed Cluster Creation", cluster);
|
||||
cluster.waitClusterUp();
|
||||
dfs = cluster.getFileSystem();
|
||||
assertNotNull("Failed to get FileSystem", dfs);
|
||||
|
||||
DFSTestUtil util = new DFSTestUtil.Builder().
|
||||
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
||||
//create files
|
||||
final String pathString = new String("/testfile");
|
||||
final Path path = new Path(pathString);
|
||||
util.createFile(dfs, path, 1024, REPL_FACTOR, 1000L);
|
||||
util.waitReplication(dfs, path, REPL_FACTOR);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
||||
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
||||
}
|
||||
String[] bIds = sb.toString().split(" ");
|
||||
try {
|
||||
//make sure datanode that has replica is fine before decommission
|
||||
String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
|
||||
System.out.println(outStr);
|
||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||
|
||||
//decommission datanode
|
||||
ExtendedBlock eb = util.getFirstBlock(dfs, path);
|
||||
DatanodeDescriptor dn = cluster.getNameNode().getNamesystem()
|
||||
.getBlockManager().getBlockCollection(eb.getLocalBlock())
|
||||
.getBlocks()[0].getDatanode(0);
|
||||
cluster.getNameNode().getNamesystem().getBlockManager()
|
||||
.getDatanodeManager().startDecommission(dn);
|
||||
String dnName = dn.getXferAddr();
|
||||
|
||||
//wait for decommission start
|
||||
DatanodeInfo datanodeInfo = null;
|
||||
int count = 0;
|
||||
do {
|
||||
Thread.sleep(2000);
|
||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
||||
if (dnName.equals(info.getXferAddr())) {
|
||||
datanodeInfo = info;
|
||||
}
|
||||
}
|
||||
//check decommissioning only once
|
||||
if(!checkDecommissionInProgress && datanodeInfo != null
|
||||
&& datanodeInfo.isDecommissionInProgress()) {
|
||||
String fsckOut = runFsck(conf, 3, true, "/", "-blockId", bIds[0]);
|
||||
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONING_STATUS));
|
||||
checkDecommissionInProgress = true;
|
||||
}
|
||||
} while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
|
||||
|
||||
//check decommissioned
|
||||
String fsckOut = runFsck(conf, 2, true, "/", "-blockId", bIds[0]);
|
||||
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS));
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for blockIdCK with block corruption
|
||||
*/
|
||||
@Test
|
||||
public void testBlockIdCKCorruption() throws Exception {
|
||||
short NUM_DN = 1;
|
||||
final long blockSize = 512;
|
||||
Random random = new Random();
|
||||
DFSClient dfsClient;
|
||||
LocatedBlocks blocks;
|
||||
ExtendedBlock block;
|
||||
short repFactor = 1;
|
||||
String [] racks = {"/rack1"};
|
||||
String [] hosts = {"host1"};
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||
// Set short retry timeouts so this test runs faster
|
||||
conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
|
||||
|
||||
MiniDFSCluster cluster = null;
|
||||
DistributedFileSystem dfs = null;
|
||||
try {
|
||||
cluster =
|
||||
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
|
||||
.racks(racks).build();
|
||||
|
||||
assertNotNull("Failed Cluster Creation", cluster);
|
||||
cluster.waitClusterUp();
|
||||
dfs = cluster.getFileSystem();
|
||||
assertNotNull("Failed to get FileSystem", dfs);
|
||||
|
||||
DFSTestUtil util = new DFSTestUtil.Builder().
|
||||
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
||||
//create files
|
||||
final String pathString = new String("/testfile");
|
||||
final Path path = new Path(pathString);
|
||||
util.createFile(dfs, path, 1024, repFactor, 1000L);
|
||||
util.waitReplication(dfs, path, repFactor);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
||||
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
||||
}
|
||||
String[] bIds = sb.toString().split(" ");
|
||||
|
||||
//make sure block is healthy before we corrupt it
|
||||
String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
|
||||
System.out.println(outStr);
|
||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||
|
||||
// corrupt replicas
|
||||
block = DFSTestUtil.getFirstBlock(dfs, path);
|
||||
File blockFile = MiniDFSCluster.getBlockFile(0, block);
|
||||
if (blockFile != null && blockFile.exists()) {
|
||||
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
|
||||
FileChannel channel = raFile.getChannel();
|
||||
String badString = "BADBAD";
|
||||
int rand = random.nextInt((int) channel.size()/2);
|
||||
raFile.seek(rand);
|
||||
raFile.write(badString.getBytes());
|
||||
raFile.close();
|
||||
}
|
||||
|
||||
util.waitCorruptReplicas(dfs, cluster.getNamesystem(), path, block, 1);
|
||||
|
||||
outStr = runFsck(conf, 1, false, "/", "-blockId", block.getBlockName());
|
||||
System.out.println(outStr);
|
||||
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user