HDFS-9070. Allow fsck display pending replica location information for being-written blocks. Contributed by Gao Rui.
This commit is contained in:
parent
59ce780d53
commit
d806a5bf07
@ -1546,6 +1546,9 @@ Release 2.8.0 - UNRELEASED
|
|||||||
HDFS-8647. Abstract BlockManager's rack policy into BlockPlacementPolicy.
|
HDFS-8647. Abstract BlockManager's rack policy into BlockPlacementPolicy.
|
||||||
(Brahma Reddy Battula via mingma)
|
(Brahma Reddy Battula via mingma)
|
||||||
|
|
||||||
|
HDFS-9070. Allow fsck display pending replica location information for
|
||||||
|
being-written blocks. (GAO Rui via jing9)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
|
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
|
||||||
|
@ -122,6 +122,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
public static final String FAILURE_STATUS = "FAILED";
|
public static final String FAILURE_STATUS = "FAILED";
|
||||||
|
|
||||||
private final NameNode namenode;
|
private final NameNode namenode;
|
||||||
|
private final BlockManager blockManager;
|
||||||
private final NetworkTopology networktopology;
|
private final NetworkTopology networktopology;
|
||||||
private final int totalDatanodes;
|
private final int totalDatanodes;
|
||||||
private final InetAddress remoteAddress;
|
private final InetAddress remoteAddress;
|
||||||
@ -196,6 +197,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
int totalDatanodes, InetAddress remoteAddress) {
|
int totalDatanodes, InetAddress remoteAddress) {
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
this.namenode = namenode;
|
this.namenode = namenode;
|
||||||
|
this.blockManager = namenode.getNamesystem().getBlockManager();
|
||||||
this.networktopology = networktopology;
|
this.networktopology = networktopology;
|
||||||
this.out = out;
|
this.out = out;
|
||||||
this.totalDatanodes = totalDatanodes;
|
this.totalDatanodes = totalDatanodes;
|
||||||
@ -249,24 +251,23 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockManager bm = namenode.getNamesystem().getBlockManager();
|
|
||||||
try {
|
try {
|
||||||
//get blockInfo
|
//get blockInfo
|
||||||
Block block = new Block(Block.getBlockId(blockId));
|
Block block = new Block(Block.getBlockId(blockId));
|
||||||
//find which file this block belongs to
|
//find which file this block belongs to
|
||||||
BlockInfo blockInfo = bm.getStoredBlock(block);
|
BlockInfo blockInfo = blockManager.getStoredBlock(block);
|
||||||
if(blockInfo == null) {
|
if(blockInfo == null) {
|
||||||
out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
|
out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
|
||||||
LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
|
LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
BlockCollection bc = bm.getBlockCollection(blockInfo);
|
BlockCollection bc = blockManager.getBlockCollection(blockInfo);
|
||||||
INode iNode = (INode) bc;
|
INode iNode = (INode) bc;
|
||||||
NumberReplicas numberReplicas= bm.countNodes(blockInfo);
|
NumberReplicas numberReplicas= blockManager.countNodes(blockInfo);
|
||||||
out.println("Block Id: " + blockId);
|
out.println("Block Id: " + blockId);
|
||||||
out.println("Block belongs to: "+iNode.getFullPathName());
|
out.println("Block belongs to: "+iNode.getFullPathName());
|
||||||
out.println("No. of Expected Replica: " +
|
out.println("No. of Expected Replica: " +
|
||||||
bm.getExpectedReplicaNum(blockInfo));
|
blockManager.getExpectedReplicaNum(blockInfo));
|
||||||
out.println("No. of live Replica: " + numberReplicas.liveReplicas());
|
out.println("No. of live Replica: " + numberReplicas.liveReplicas());
|
||||||
out.println("No. of excess Replica: " + numberReplicas.excessReplicas());
|
out.println("No. of excess Replica: " + numberReplicas.excessReplicas());
|
||||||
out.println("No. of stale Replica: " +
|
out.println("No. of stale Replica: " +
|
||||||
@ -279,8 +280,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
numberReplicas.corruptReplicas());
|
numberReplicas.corruptReplicas());
|
||||||
//record datanodes that have corrupted block replica
|
//record datanodes that have corrupted block replica
|
||||||
Collection<DatanodeDescriptor> corruptionRecord = null;
|
Collection<DatanodeDescriptor> corruptionRecord = null;
|
||||||
if (bm.getCorruptReplicas(block) != null) {
|
if (blockManager.getCorruptReplicas(block) != null) {
|
||||||
corruptionRecord = bm.getCorruptReplicas(block);
|
corruptionRecord = blockManager.getCorruptReplicas(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
//report block replicas status on datanodes
|
//report block replicas status on datanodes
|
||||||
@ -290,7 +291,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
dn.getNetworkLocation() + " ");
|
dn.getNetworkLocation() + " ");
|
||||||
if (corruptionRecord != null && corruptionRecord.contains(dn)) {
|
if (corruptionRecord != null && corruptionRecord.contains(dn)) {
|
||||||
out.print(CORRUPT_STATUS + "\t ReasonCode: " +
|
out.print(CORRUPT_STATUS + "\t ReasonCode: " +
|
||||||
bm.getCorruptReason(block,dn));
|
blockManager.getCorruptReason(block, dn));
|
||||||
} else if (dn.isDecommissioned() ){
|
} else if (dn.isDecommissioned() ){
|
||||||
out.print(DECOMMISSIONED_STATUS);
|
out.print(DECOMMISSIONED_STATUS);
|
||||||
} else if (dn.isDecommissionInProgress()) {
|
} else if (dn.isDecommissionInProgress()) {
|
||||||
@ -546,8 +547,63 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectBlocksSummary(String parent, HdfsFileStatus file, Result res,
|
/**
|
||||||
LocatedBlocks blocks) throws IOException {
|
* Display info of each replica for replication block.
|
||||||
|
* For striped block group, display info of each internal block.
|
||||||
|
*/
|
||||||
|
private String getReplicaInfo(BlockInfo storedBlock) {
|
||||||
|
if (!(showLocations || showRacks || showReplicaDetails)) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
final boolean isComplete = storedBlock.isComplete();
|
||||||
|
DatanodeStorageInfo[] storages = isComplete ?
|
||||||
|
blockManager.getStorages(storedBlock) :
|
||||||
|
storedBlock.getUnderConstructionFeature().getExpectedStorageLocations();
|
||||||
|
StringBuilder sb = new StringBuilder(" [");
|
||||||
|
|
||||||
|
for (int i = 0; i < storages.length; i++) {
|
||||||
|
DatanodeStorageInfo storage = storages[i];
|
||||||
|
DatanodeDescriptor dnDesc = storage.getDatanodeDescriptor();
|
||||||
|
if (showRacks) {
|
||||||
|
sb.append(NodeBase.getPath(dnDesc));
|
||||||
|
} else {
|
||||||
|
sb.append(new DatanodeInfoWithStorage(dnDesc, storage.getStorageID(),
|
||||||
|
storage.getStorageType()));
|
||||||
|
}
|
||||||
|
if (showReplicaDetails) {
|
||||||
|
LightWeightHashSet<BlockInfo> blocksExcess =
|
||||||
|
blockManager.excessReplicateMap.get(dnDesc.getDatanodeUuid());
|
||||||
|
Collection<DatanodeDescriptor> corruptReplicas =
|
||||||
|
blockManager.getCorruptReplicas(storedBlock);
|
||||||
|
sb.append("(");
|
||||||
|
if (dnDesc.isDecommissioned()) {
|
||||||
|
sb.append("DECOMMISSIONED)");
|
||||||
|
} else if (dnDesc.isDecommissionInProgress()) {
|
||||||
|
sb.append("DECOMMISSIONING)");
|
||||||
|
} else if (corruptReplicas != null
|
||||||
|
&& corruptReplicas.contains(dnDesc)) {
|
||||||
|
sb.append("CORRUPT)");
|
||||||
|
} else if (blocksExcess != null
|
||||||
|
&& blocksExcess.contains(storedBlock)) {
|
||||||
|
sb.append("EXCESS)");
|
||||||
|
} else if (dnDesc.isStale(this.staleInterval)) {
|
||||||
|
sb.append("STALE_NODE)");
|
||||||
|
} else if (storage.areBlockContentsStale()) {
|
||||||
|
sb.append("STALE_BLOCK_CONTENT)");
|
||||||
|
} else {
|
||||||
|
sb.append("LIVE)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i < storages.length - 1) {
|
||||||
|
sb.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sb.append(']');
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void collectBlocksSummary(String parent, HdfsFileStatus file,
|
||||||
|
Result res, LocatedBlocks blocks) throws IOException {
|
||||||
String path = file.getFullName(parent);
|
String path = file.getFullName(parent);
|
||||||
boolean isOpen = blocks.isUnderConstruction();
|
boolean isOpen = blocks.isUnderConstruction();
|
||||||
if (isOpen && !showOpenFiles) {
|
if (isOpen && !showOpenFiles) {
|
||||||
@ -570,13 +626,12 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
// it is under construction
|
// it is under construction
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
BlockManager bm = namenode.getNamesystem().getBlockManager();
|
|
||||||
|
|
||||||
final BlockInfo storedBlock = bm.getStoredBlock(
|
final BlockInfo storedBlock = blockManager.getStoredBlock(
|
||||||
block.getLocalBlock());
|
block.getLocalBlock());
|
||||||
final int minReplication = bm.getMinStorageNum(storedBlock);
|
final int minReplication = blockManager.getMinStorageNum(storedBlock);
|
||||||
// count decommissionedReplicas / decommissioningReplicas
|
// count decommissionedReplicas / decommissioningReplicas
|
||||||
NumberReplicas numberReplicas = bm.countNodes(storedBlock);
|
NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
|
||||||
int decommissionedReplicas = numberReplicas.decommissioned();
|
int decommissionedReplicas = numberReplicas.decommissioned();
|
||||||
int decommissioningReplicas = numberReplicas.decommissioning();
|
int decommissioningReplicas = numberReplicas.decommissioning();
|
||||||
res.decommissionedReplicas += decommissionedReplicas;
|
res.decommissionedReplicas += decommissionedReplicas;
|
||||||
@ -663,7 +718,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
|
|
||||||
// report
|
// report
|
||||||
String blkName = block.toString();
|
String blkName = block.toString();
|
||||||
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
|
report.append(blockNumber + ". " + blkName + " len=" +
|
||||||
|
block.getNumBytes());
|
||||||
if (totalReplicasPerBlock == 0 && !isCorrupt) {
|
if (totalReplicasPerBlock == 0 && !isCorrupt) {
|
||||||
// If the block is corrupted, it means all its available replicas are
|
// If the block is corrupted, it means all its available replicas are
|
||||||
// corrupted. We don't mark it as missing given these available replicas
|
// corrupted. We don't mark it as missing given these available replicas
|
||||||
@ -675,52 +731,34 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
missize += block.getNumBytes();
|
missize += block.getNumBytes();
|
||||||
} else {
|
} else {
|
||||||
report.append(" Live_repl=" + liveReplicas);
|
report.append(" Live_repl=" + liveReplicas);
|
||||||
if (showLocations || showRacks || showReplicaDetails) {
|
String info = getReplicaInfo(storedBlock);
|
||||||
StringBuilder sb = new StringBuilder("[");
|
if (!info.isEmpty()){
|
||||||
DatanodeStorageInfo[] storages = bm.getStorages(storedBlock);
|
report.append(" ").append(info);
|
||||||
for (int i = 0; i < storages.length; i++) {
|
|
||||||
DatanodeStorageInfo storage = storages[i];
|
|
||||||
DatanodeDescriptor dnDesc = storage.getDatanodeDescriptor();
|
|
||||||
if (showRacks) {
|
|
||||||
sb.append(NodeBase.getPath(dnDesc));
|
|
||||||
} else {
|
|
||||||
sb.append(new DatanodeInfoWithStorage(dnDesc, storage.getStorageID(), storage
|
|
||||||
.getStorageType()));
|
|
||||||
}
|
|
||||||
if (showReplicaDetails) {
|
|
||||||
LightWeightHashSet<BlockInfo> blocksExcess =
|
|
||||||
bm.excessReplicateMap.get(dnDesc.getDatanodeUuid());
|
|
||||||
Collection<DatanodeDescriptor> corruptReplicas =
|
|
||||||
bm.getCorruptReplicas(block.getLocalBlock());
|
|
||||||
sb.append("(");
|
|
||||||
if (dnDesc.isDecommissioned()) {
|
|
||||||
sb.append("DECOMMISSIONED)");
|
|
||||||
} else if (dnDesc.isDecommissionInProgress()) {
|
|
||||||
sb.append("DECOMMISSIONING)");
|
|
||||||
} else if (corruptReplicas != null && corruptReplicas.contains(dnDesc)) {
|
|
||||||
sb.append("CORRUPT)");
|
|
||||||
} else if (blocksExcess != null && blocksExcess.contains(block.getLocalBlock())) {
|
|
||||||
sb.append("EXCESS)");
|
|
||||||
} else if (dnDesc.isStale(this.staleInterval)) {
|
|
||||||
sb.append("STALE_NODE)");
|
|
||||||
} else if (storage.areBlockContentsStale()) {
|
|
||||||
sb.append("STALE_BLOCK_CONTENT)");
|
|
||||||
} else {
|
|
||||||
sb.append("LIVE)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (i < storages.length - 1) {
|
|
||||||
sb.append(", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sb.append(']');
|
|
||||||
report.append(" " + sb.toString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
report.append('\n');
|
report.append('\n');
|
||||||
blockNumber++;
|
blockNumber++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//display under construction block info.
|
||||||
|
if (!blocks.isLastBlockComplete() && lastBlock != null) {
|
||||||
|
ExtendedBlock block = lastBlock.getBlock();
|
||||||
|
String blkName = block.toString();
|
||||||
|
BlockInfo storedBlock = blockManager.getStoredBlock(
|
||||||
|
block.getLocalBlock());
|
||||||
|
DatanodeStorageInfo[] storages = storedBlock
|
||||||
|
.getUnderConstructionFeature().getExpectedStorageLocations();
|
||||||
|
report.append('\n');
|
||||||
|
report.append("Under Construction Block:\n");
|
||||||
|
report.append(blockNumber).append(". ").append(blkName);
|
||||||
|
report.append(" len=").append(block.getNumBytes());
|
||||||
|
report.append(" Expected_repl=" + storages.length);
|
||||||
|
String info=getReplicaInfo(storedBlock);
|
||||||
|
if (!info.isEmpty()){
|
||||||
|
report.append(" ").append(info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// count corrupt file & move or delete if necessary
|
// count corrupt file & move or delete if necessary
|
||||||
if ((missing > 0) || (corrupt > 0)) {
|
if ((missing > 0) || (corrupt > 0)) {
|
||||||
if (!showFiles) {
|
if (!showFiles) {
|
||||||
|
@ -625,9 +625,11 @@ public class TestFsck {
|
|||||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
assertFalse(outStr.contains("OPENFORWRITE"));
|
assertFalse(outStr.contains("OPENFORWRITE"));
|
||||||
// Use -openforwrite option to list open files
|
// Use -openforwrite option to list open files
|
||||||
outStr = runFsck(conf, 0, true, topDir, "-openforwrite");
|
outStr = runFsck(conf, 0, true, topDir, "-files", "-blocks",
|
||||||
|
"-locations", "-openforwrite");
|
||||||
System.out.println(outStr);
|
System.out.println(outStr);
|
||||||
assertTrue(outStr.contains("OPENFORWRITE"));
|
assertTrue(outStr.contains("OPENFORWRITE"));
|
||||||
|
assertTrue(outStr.contains("Under Construction Block:"));
|
||||||
assertTrue(outStr.contains("openFile"));
|
assertTrue(outStr.contains("openFile"));
|
||||||
// Close the file
|
// Close the file
|
||||||
out.close();
|
out.close();
|
||||||
@ -636,6 +638,7 @@ public class TestFsck {
|
|||||||
System.out.println(outStr);
|
System.out.println(outStr);
|
||||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
assertFalse(outStr.contains("OPENFORWRITE"));
|
assertFalse(outStr.contains("OPENFORWRITE"));
|
||||||
|
assertFalse(outStr.contains("Under Construction Block:"));
|
||||||
util.cleanup(fs, topDir);
|
util.cleanup(fs, topDir);
|
||||||
if (fs != null) {try{fs.close();} catch(Exception e){}}
|
if (fs != null) {try{fs.close();} catch(Exception e){}}
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
@ -645,6 +648,77 @@ public class TestFsck {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFsckOpenECFiles() throws Exception {
|
||||||
|
DFSTestUtil util = new DFSTestUtil.Builder().setName("TestFsckECFile").
|
||||||
|
setNumFiles(4).build();
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
FileSystem fs = null;
|
||||||
|
String outStr;
|
||||||
|
try {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 10000L);
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(10).build();
|
||||||
|
cluster.getFileSystem().getClient().setErasureCodingPolicy("/", null);
|
||||||
|
String topDir = "/myDir";
|
||||||
|
byte[] randomBytes = new byte[3000000];
|
||||||
|
int seed = 42;
|
||||||
|
new Random(seed).nextBytes(randomBytes);
|
||||||
|
cluster.waitActive();
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
util.createFiles(fs, topDir);
|
||||||
|
|
||||||
|
// Open a EC file for writing and do not close for now
|
||||||
|
Path openFile = new Path(topDir + "/openECFile");
|
||||||
|
FSDataOutputStream out = fs.create(openFile);
|
||||||
|
int writeCount = 0;
|
||||||
|
while (writeCount != 300) {
|
||||||
|
out.write(randomBytes);
|
||||||
|
writeCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We expect the filesystem to be HEALTHY and show one open file
|
||||||
|
outStr = runFsck(conf, 0, true, openFile.toString(), "-files",
|
||||||
|
"-blocks", "-openforwrite");
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
assertTrue(outStr.contains("OPENFORWRITE"));
|
||||||
|
assertTrue(outStr.contains("Live_repl=9"));
|
||||||
|
assertTrue(outStr.contains("Expected_repl=9"));
|
||||||
|
|
||||||
|
// Use -openforwrite option to list open files
|
||||||
|
outStr = runFsck(conf, 0, true, openFile.toString(), "-files", "-blocks",
|
||||||
|
"-locations", "-openforwrite", "-replicaDetails");
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
assertTrue(outStr.contains("OPENFORWRITE"));
|
||||||
|
assertTrue(outStr.contains("Live_repl=9"));
|
||||||
|
assertTrue(outStr.contains("Expected_repl=9"));
|
||||||
|
assertTrue(outStr.contains("Under Construction Block:"));
|
||||||
|
|
||||||
|
// Close the file
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
// Now, fsck should show HEALTHY fs and should not show any open files
|
||||||
|
outStr = runFsck(conf, 0, true, openFile.toString(), "-files", "-blocks",
|
||||||
|
"-locations", "-racks", "-replicaDetails");
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
assertFalse(outStr.contains("OPENFORWRITE"));
|
||||||
|
assertFalse(outStr.contains("Under Construction Block:"));
|
||||||
|
assertFalse(outStr.contains("Expected_repl=9"));
|
||||||
|
assertTrue(outStr.contains("Live_repl=9"));
|
||||||
|
util.cleanup(fs, topDir);
|
||||||
|
} finally {
|
||||||
|
if (fs != null) {
|
||||||
|
try {
|
||||||
|
fs.close();
|
||||||
|
} catch (Exception e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCorruptBlock() throws Exception {
|
public void testCorruptBlock() throws Exception {
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user