HDFS-6663. Admin command to track file and locations from block id.
Contributed by Chen He.
This commit is contained in:
parent
0d3e7e2bd6
commit
371a3b87ed
@ -262,6 +262,9 @@ Release 2.7.0 - UNRELEASED
|
|||||||
HDFS-7278. Add a command that allows sysadmins to manually trigger full
|
HDFS-7278. Add a command that allows sysadmins to manually trigger full
|
||||||
block reports from a DN (cmccabe)
|
block reports from a DN (cmccabe)
|
||||||
|
|
||||||
|
HDFS-6663. Admin command to track file and locations from block id.
|
||||||
|
(Chen He via kihwal)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
HDFS-7055. Add tracing to DFSInputStream (cmccabe)
|
HDFS-7055. Add tracing to DFSInputStream (cmccabe)
|
||||||
|
@ -3505,6 +3505,13 @@ public Collection<DatanodeDescriptor> getCorruptReplicas(Block block) {
|
|||||||
return corruptReplicas.getNodes(block);
|
return corruptReplicas.getNodes(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get reason for certain corrupted replicas for a given block and a given dn.
|
||||||
|
*/
|
||||||
|
public String getCorruptReason(Block block, DatanodeDescriptor node) {
|
||||||
|
return corruptReplicas.getCorruptReason(block, node);
|
||||||
|
}
|
||||||
|
|
||||||
/** @return the size of UnderReplicatedBlocks */
|
/** @return the size of UnderReplicatedBlocks */
|
||||||
public int numOfUnderReplicatedBlocks() {
|
public int numOfUnderReplicatedBlocks() {
|
||||||
return neededReplications.size();
|
return neededReplications.size();
|
||||||
|
@ -224,4 +224,25 @@ long[] getCorruptReplicaBlockIds(int numExpectedBlocks,
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return the reason about corrupted replica for a given block
|
||||||
|
* on a given dn
|
||||||
|
* @param block block that has corrupted replica
|
||||||
|
* @param node datanode that contains this corrupted replica
|
||||||
|
* @return reason
|
||||||
|
*/
|
||||||
|
String getCorruptReason(Block block, DatanodeDescriptor node) {
|
||||||
|
Reason reason = null;
|
||||||
|
if(corruptReplicasMap.containsKey(block)) {
|
||||||
|
if (corruptReplicasMap.get(block).containsKey(node)) {
|
||||||
|
reason = corruptReplicasMap.get(block).get(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (reason != null) {
|
||||||
|
return reason.toString();
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -47,6 +47,7 @@
|
|||||||
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
import org.apache.hadoop.hdfs.RemotePeerFactory;
|
||||||
import org.apache.hadoop.hdfs.net.Peer;
|
import org.apache.hadoop.hdfs.net.Peer;
|
||||||
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
import org.apache.hadoop.hdfs.net.TcpPeerServer;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
|
import org.apache.hadoop.hdfs.protocol.DirectoryListing;
|
||||||
@ -59,8 +60,12 @@
|
|||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataEncryptionKeyFactory;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataEncryptionKeyFactory;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
|
import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
@ -103,6 +108,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
// return string marking fsck status
|
// return string marking fsck status
|
||||||
public static final String CORRUPT_STATUS = "is CORRUPT";
|
public static final String CORRUPT_STATUS = "is CORRUPT";
|
||||||
public static final String HEALTHY_STATUS = "is HEALTHY";
|
public static final String HEALTHY_STATUS = "is HEALTHY";
|
||||||
|
public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING";
|
||||||
|
public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED";
|
||||||
public static final String NONEXISTENT_STATUS = "does not exist";
|
public static final String NONEXISTENT_STATUS = "does not exist";
|
||||||
public static final String FAILURE_STATUS = "FAILED";
|
public static final String FAILURE_STATUS = "FAILED";
|
||||||
|
|
||||||
@ -143,7 +150,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
*/
|
*/
|
||||||
private boolean doDelete = false;
|
private boolean doDelete = false;
|
||||||
|
|
||||||
private String path = "/";
|
String path = "/";
|
||||||
|
|
||||||
|
private String blockIds = null;
|
||||||
|
|
||||||
// We return back N files that are corrupt; the list of files returned is
|
// We return back N files that are corrupt; the list of files returned is
|
||||||
// ordered by block id; to allow continuation support, pass in the last block
|
// ordered by block id; to allow continuation support, pass in the last block
|
||||||
@ -195,21 +204,112 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
else if (key.equals("openforwrite")) {this.showOpenFiles = true; }
|
else if (key.equals("openforwrite")) {this.showOpenFiles = true; }
|
||||||
else if (key.equals("listcorruptfileblocks")) {
|
else if (key.equals("listcorruptfileblocks")) {
|
||||||
this.showCorruptFileBlocks = true;
|
this.showCorruptFileBlocks = true;
|
||||||
}
|
} else if (key.equals("startblockafter")) {
|
||||||
else if (key.equals("startblockafter")) {
|
|
||||||
this.currentCookie[0] = pmap.get("startblockafter")[0];
|
this.currentCookie[0] = pmap.get("startblockafter")[0];
|
||||||
} else if (key.equals("includeSnapshots")) {
|
} else if (key.equals("includeSnapshots")) {
|
||||||
this.snapshottableDirs = new ArrayList<String>();
|
this.snapshottableDirs = new ArrayList<String>();
|
||||||
|
} else if (key.equals("blockId")) {
|
||||||
|
this.blockIds = pmap.get("blockId")[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check block information given a blockId number
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void blockIdCK(String blockId) {
|
||||||
|
|
||||||
|
if(blockId == null) {
|
||||||
|
out.println("Please provide valid blockId!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockManager bm = namenode.getNamesystem().getBlockManager();
|
||||||
|
try {
|
||||||
|
//get blockInfo
|
||||||
|
Block block = new Block(Block.getBlockId(blockId));
|
||||||
|
//find which file this block belongs to
|
||||||
|
BlockInfo blockInfo = bm.getStoredBlock(block);
|
||||||
|
if(blockInfo == null) {
|
||||||
|
out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
|
||||||
|
LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
BlockCollection bc = bm.getBlockCollection(blockInfo);
|
||||||
|
INode iNode = (INode) bc;
|
||||||
|
NumberReplicas numberReplicas= bm.countNodes(block);
|
||||||
|
out.println("Block Id: " + blockId);
|
||||||
|
out.println("Block belongs to: "+iNode.getFullPathName());
|
||||||
|
out.println("No. of Expected Replica: " + bc.getBlockReplication());
|
||||||
|
out.println("No. of live Replica: " + numberReplicas.liveReplicas());
|
||||||
|
out.println("No. of excess Replica: " + numberReplicas.excessReplicas());
|
||||||
|
out.println("No. of stale Replica: " + numberReplicas.replicasOnStaleNodes());
|
||||||
|
out.println("No. of decommission Replica: "
|
||||||
|
+ numberReplicas.decommissionedReplicas());
|
||||||
|
out.println("No. of corrupted Replica: " + numberReplicas.corruptReplicas());
|
||||||
|
//record datanodes that have corrupted block replica
|
||||||
|
Collection<DatanodeDescriptor> corruptionRecord = null;
|
||||||
|
if (bm.getCorruptReplicas(block) != null) {
|
||||||
|
corruptionRecord = bm.getCorruptReplicas(block);
|
||||||
|
}
|
||||||
|
|
||||||
|
//report block replicas status on datanodes
|
||||||
|
for(int idx = (blockInfo.numNodes()-1); idx >= 0; idx--) {
|
||||||
|
DatanodeDescriptor dn = blockInfo.getDatanode(idx);
|
||||||
|
out.print("Block replica on datanode/rack: " + dn.getHostName() +
|
||||||
|
dn.getNetworkLocation() + " ");
|
||||||
|
if (corruptionRecord != null && corruptionRecord.contains(dn)) {
|
||||||
|
out.print(CORRUPT_STATUS+"\t ReasonCode: "+
|
||||||
|
bm.getCorruptReason(block,dn));
|
||||||
|
} else if (dn.isDecommissioned() ){
|
||||||
|
out.print(DECOMMISSIONED_STATUS);
|
||||||
|
} else if (dn.isDecommissionInProgress()) {
|
||||||
|
out.print(DECOMMISSIONING_STATUS);
|
||||||
|
} else {
|
||||||
|
out.print(HEALTHY_STATUS);
|
||||||
|
}
|
||||||
|
out.print("\n");
|
||||||
|
}
|
||||||
|
} catch (Exception e){
|
||||||
|
String errMsg = "Fsck on blockId '" + blockId;
|
||||||
|
LOG.warn(errMsg, e);
|
||||||
|
out.println(e.getMessage());
|
||||||
|
out.print("\n\n" + errMsg);
|
||||||
|
LOG.warn("Error in looking up block", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check files on DFS, starting from the indicated path.
|
* Check files on DFS, starting from the indicated path.
|
||||||
*/
|
*/
|
||||||
public void fsck() {
|
public void fsck() {
|
||||||
final long startTime = Time.now();
|
final long startTime = Time.now();
|
||||||
try {
|
try {
|
||||||
|
if(blockIds != null) {
|
||||||
|
|
||||||
|
String[] blocks = blockIds.split(" ");
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("FSCK started by " +
|
||||||
|
UserGroupInformation.getCurrentUser() + " from " +
|
||||||
|
remoteAddress + " at " + new Date());
|
||||||
|
out.println(sb.toString());
|
||||||
|
sb.append(" for blockIds: \n");
|
||||||
|
for (String blk: blocks) {
|
||||||
|
if(blk == null || !blk.contains("blk_")) {
|
||||||
|
out.println("Incorrect blockId format: " + blk);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out.print("\n");
|
||||||
|
blockIdCK(blk);
|
||||||
|
sb.append(blk + "\n");
|
||||||
|
}
|
||||||
|
LOG.info(sb.toString());
|
||||||
|
namenode.getNamesystem().logFsckEvent("/", remoteAddress);
|
||||||
|
out.flush();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
String msg = "FSCK started by " + UserGroupInformation.getCurrentUser()
|
String msg = "FSCK started by " + UserGroupInformation.getCurrentUser()
|
||||||
+ " from " + remoteAddress + " for path " + path + " at " + new Date();
|
+ " from " + remoteAddress + " for path " + path + " at " + new Date();
|
||||||
LOG.info(msg);
|
LOG.info(msg);
|
||||||
|
@ -92,7 +92,10 @@ public class DFSck extends Configured implements Tool {
|
|||||||
+ "\t-blocks\tprint out block report\n"
|
+ "\t-blocks\tprint out block report\n"
|
||||||
+ "\t-locations\tprint out locations for every block\n"
|
+ "\t-locations\tprint out locations for every block\n"
|
||||||
+ "\t-racks\tprint out network topology for data-node locations\n"
|
+ "\t-racks\tprint out network topology for data-node locations\n"
|
||||||
+ "\t-showprogress\tshow progress in output. Default is OFF (no progress)\n\n"
|
+ "\t-showprogress\tshow progress in output. Default is OFF (no progress)\n"
|
||||||
|
+ "\t-blockId\tprint out which file this blockId belongs to, locations"
|
||||||
|
+ " (nodes, racks) of this block, and other diagnostics info"
|
||||||
|
+ " (under replicated, corrupted or not, etc)\n\n"
|
||||||
+ "Please Note:\n"
|
+ "Please Note:\n"
|
||||||
+ "\t1. By default fsck ignores files opened for write, "
|
+ "\t1. By default fsck ignores files opened for write, "
|
||||||
+ "use -openforwrite to report such files. They are usually "
|
+ "use -openforwrite to report such files. They are usually "
|
||||||
@ -278,6 +281,15 @@ else if (args[idx].equals("-list-corruptfileblocks")) {
|
|||||||
doListCorruptFileBlocks = true;
|
doListCorruptFileBlocks = true;
|
||||||
} else if (args[idx].equals("-includeSnapshots")) {
|
} else if (args[idx].equals("-includeSnapshots")) {
|
||||||
url.append("&includeSnapshots=1");
|
url.append("&includeSnapshots=1");
|
||||||
|
} else if (args[idx].equals("-blockId")) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
idx++;
|
||||||
|
while(idx < args.length && !args[idx].startsWith("-")){
|
||||||
|
sb.append(args[idx]);
|
||||||
|
sb.append(" ");
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
url.append("&blockId=").append(URLEncoder.encode(sb.toString(), "UTF-8"));
|
||||||
} else if (!args[idx].startsWith("-")) {
|
} else if (!args[idx].startsWith("-")) {
|
||||||
if (null == dir) {
|
if (null == dir) {
|
||||||
dir = args[idx];
|
dir = args[idx];
|
||||||
@ -287,6 +299,7 @@ else if (args[idx].equals("-list-corruptfileblocks")) {
|
|||||||
printUsage(System.err);
|
printUsage(System.err);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
System.err.println("fsck: Illegal option '" + args[idx] + "'");
|
System.err.println("fsck: Illegal option '" + args[idx] + "'");
|
||||||
printUsage(System.err);
|
printUsage(System.err);
|
||||||
@ -327,6 +340,12 @@ else if (args[idx].equals("-list-corruptfileblocks")) {
|
|||||||
errCode = 1;
|
errCode = 1;
|
||||||
} else if (lastLine.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) {
|
} else if (lastLine.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) {
|
||||||
errCode = 0;
|
errCode = 0;
|
||||||
|
} else if (lastLine.contains("Incorrect blockId format:")) {
|
||||||
|
errCode = 0;
|
||||||
|
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONED_STATUS)) {
|
||||||
|
errCode = 2;
|
||||||
|
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
|
||||||
|
errCode = 3;
|
||||||
}
|
}
|
||||||
return errCode;
|
return errCode;
|
||||||
}
|
}
|
||||||
|
@ -18,13 +18,6 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
|
||||||
import static org.junit.Assert.assertNull;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
import static org.junit.Assert.fail;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@ -48,6 +41,7 @@
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
import org.apache.commons.logging.impl.Log4JLogger;
|
import org.apache.commons.logging.impl.Log4JLogger;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
@ -66,11 +60,14 @@
|
|||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.protocol.Block;
|
import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
|
import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
|
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
|
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck.Result;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
|
||||||
@ -86,8 +83,17 @@
|
|||||||
import org.apache.log4j.RollingFileAppender;
|
import org.apache.log4j.RollingFileAppender;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.google.common.collect.Sets;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.mockito.Mockito.*;
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
import static org.mockito.Matchers.anyBoolean;
|
||||||
|
import static org.mockito.Matchers.anyLong;
|
||||||
|
import static org.mockito.Matchers.anyString;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A JUnit test for doing fsck
|
* A JUnit test for doing fsck
|
||||||
@ -1096,4 +1102,227 @@ public void testFsckForSnapshotFiles() throws Exception {
|
|||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for blockIdCK
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockIdCK() throws Exception {
|
||||||
|
|
||||||
|
final short REPL_FACTOR = 2;
|
||||||
|
short NUM_DN = 2;
|
||||||
|
final long blockSize = 512;
|
||||||
|
|
||||||
|
String [] racks = {"/rack1", "/rack2"};
|
||||||
|
String [] hosts = {"host1", "host2"};
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
|
||||||
|
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
DistributedFileSystem dfs = null;
|
||||||
|
cluster =
|
||||||
|
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
|
||||||
|
.racks(racks).build();
|
||||||
|
|
||||||
|
assertNotNull("Failed Cluster Creation", cluster);
|
||||||
|
cluster.waitClusterUp();
|
||||||
|
dfs = cluster.getFileSystem();
|
||||||
|
assertNotNull("Failed to get FileSystem", dfs);
|
||||||
|
|
||||||
|
DFSTestUtil util = new DFSTestUtil.Builder().
|
||||||
|
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
||||||
|
//create files
|
||||||
|
final String pathString = new String("/testfile");
|
||||||
|
final Path path = new Path(pathString);
|
||||||
|
util.createFile(dfs, path, 1024, REPL_FACTOR , 1000L);
|
||||||
|
util.waitReplication(dfs, path, REPL_FACTOR);
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
||||||
|
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
||||||
|
}
|
||||||
|
String[] bIds = sb.toString().split(" ");
|
||||||
|
|
||||||
|
//run fsck
|
||||||
|
try {
|
||||||
|
//illegal input test
|
||||||
|
String runFsckResult = runFsck(conf, 0, true, "/", "-blockId",
|
||||||
|
"not_a_block_id");
|
||||||
|
assertTrue(runFsckResult.contains("Incorrect blockId format:"));
|
||||||
|
|
||||||
|
//general test
|
||||||
|
runFsckResult = runFsck(conf, 0, true, "/", "-blockId", sb.toString());
|
||||||
|
assertTrue(runFsckResult.contains(bIds[0]));
|
||||||
|
assertTrue(runFsckResult.contains(bIds[1]));
|
||||||
|
assertTrue(runFsckResult.contains(
|
||||||
|
"Block replica on datanode/rack: host1/rack1 is HEALTHY"));
|
||||||
|
assertTrue(runFsckResult.contains(
|
||||||
|
"Block replica on datanode/rack: host2/rack2 is HEALTHY"));
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for blockIdCK with datanode decommission
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testBlockIdCKDecommission() throws Exception {
|
||||||
|
|
||||||
|
final short REPL_FACTOR = 1;
|
||||||
|
short NUM_DN = 2;
|
||||||
|
final long blockSize = 512;
|
||||||
|
boolean checkDecommissionInProgress = false;
|
||||||
|
String [] racks = {"/rack1", "/rack2"};
|
||||||
|
String [] hosts = {"host1", "host2"};
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
|
||||||
|
|
||||||
|
MiniDFSCluster cluster;
|
||||||
|
DistributedFileSystem dfs ;
|
||||||
|
cluster =
|
||||||
|
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
|
||||||
|
.racks(racks).build();
|
||||||
|
|
||||||
|
assertNotNull("Failed Cluster Creation", cluster);
|
||||||
|
cluster.waitClusterUp();
|
||||||
|
dfs = cluster.getFileSystem();
|
||||||
|
assertNotNull("Failed to get FileSystem", dfs);
|
||||||
|
|
||||||
|
DFSTestUtil util = new DFSTestUtil.Builder().
|
||||||
|
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
||||||
|
//create files
|
||||||
|
final String pathString = new String("/testfile");
|
||||||
|
final Path path = new Path(pathString);
|
||||||
|
util.createFile(dfs, path, 1024, REPL_FACTOR, 1000L);
|
||||||
|
util.waitReplication(dfs, path, REPL_FACTOR);
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
||||||
|
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
||||||
|
}
|
||||||
|
String[] bIds = sb.toString().split(" ");
|
||||||
|
try {
|
||||||
|
//make sure datanode that has replica is fine before decommission
|
||||||
|
String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
|
||||||
|
System.out.println(outStr);
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
|
||||||
|
//decommission datanode
|
||||||
|
ExtendedBlock eb = util.getFirstBlock(dfs, path);
|
||||||
|
DatanodeDescriptor dn = cluster.getNameNode().getNamesystem()
|
||||||
|
.getBlockManager().getBlockCollection(eb.getLocalBlock())
|
||||||
|
.getBlocks()[0].getDatanode(0);
|
||||||
|
cluster.getNameNode().getNamesystem().getBlockManager()
|
||||||
|
.getDatanodeManager().startDecommission(dn);
|
||||||
|
String dnName = dn.getXferAddr();
|
||||||
|
|
||||||
|
//wait for decommission start
|
||||||
|
DatanodeInfo datanodeInfo = null;
|
||||||
|
int count = 0;
|
||||||
|
do {
|
||||||
|
Thread.sleep(2000);
|
||||||
|
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
||||||
|
if (dnName.equals(info.getXferAddr())) {
|
||||||
|
datanodeInfo = info;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//check decommissioning only once
|
||||||
|
if(!checkDecommissionInProgress && datanodeInfo != null
|
||||||
|
&& datanodeInfo.isDecommissionInProgress()) {
|
||||||
|
String fsckOut = runFsck(conf, 3, true, "/", "-blockId", bIds[0]);
|
||||||
|
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONING_STATUS));
|
||||||
|
checkDecommissionInProgress = true;
|
||||||
|
}
|
||||||
|
} while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
|
||||||
|
|
||||||
|
//check decommissioned
|
||||||
|
String fsckOut = runFsck(conf, 2, true, "/", "-blockId", bIds[0]);
|
||||||
|
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS));
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for blockIdCK with block corruption
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testBlockIdCKCorruption() throws Exception {
|
||||||
|
short NUM_DN = 1;
|
||||||
|
final long blockSize = 512;
|
||||||
|
Random random = new Random();
|
||||||
|
DFSClient dfsClient;
|
||||||
|
LocatedBlocks blocks;
|
||||||
|
ExtendedBlock block;
|
||||||
|
short repFactor = 1;
|
||||||
|
String [] racks = {"/rack1"};
|
||||||
|
String [] hosts = {"host1"};
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||||
|
// Set short retry timeouts so this test runs faster
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);
|
||||||
|
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
DistributedFileSystem dfs = null;
|
||||||
|
try {
|
||||||
|
cluster =
|
||||||
|
new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
|
||||||
|
.racks(racks).build();
|
||||||
|
|
||||||
|
assertNotNull("Failed Cluster Creation", cluster);
|
||||||
|
cluster.waitClusterUp();
|
||||||
|
dfs = cluster.getFileSystem();
|
||||||
|
assertNotNull("Failed to get FileSystem", dfs);
|
||||||
|
|
||||||
|
DFSTestUtil util = new DFSTestUtil.Builder().
|
||||||
|
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
||||||
|
//create files
|
||||||
|
final String pathString = new String("/testfile");
|
||||||
|
final Path path = new Path(pathString);
|
||||||
|
util.createFile(dfs, path, 1024, repFactor, 1000L);
|
||||||
|
util.waitReplication(dfs, path, repFactor);
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
||||||
|
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
||||||
|
}
|
||||||
|
String[] bIds = sb.toString().split(" ");
|
||||||
|
|
||||||
|
//make sure block is healthy before we corrupt it
|
||||||
|
String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
|
||||||
|
System.out.println(outStr);
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
|
||||||
|
// corrupt replicas
|
||||||
|
block = DFSTestUtil.getFirstBlock(dfs, path);
|
||||||
|
File blockFile = MiniDFSCluster.getBlockFile(0, block);
|
||||||
|
if (blockFile != null && blockFile.exists()) {
|
||||||
|
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
|
||||||
|
FileChannel channel = raFile.getChannel();
|
||||||
|
String badString = "BADBAD";
|
||||||
|
int rand = random.nextInt((int) channel.size()/2);
|
||||||
|
raFile.seek(rand);
|
||||||
|
raFile.write(badString.getBytes());
|
||||||
|
raFile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
util.waitCorruptReplicas(dfs, cluster.getNamesystem(), path, block, 1);
|
||||||
|
|
||||||
|
outStr = runFsck(conf, 1, false, "/", "-blockId", block.getBlockName());
|
||||||
|
System.out.println(outStr);
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user