HDFS-17033. Update fsck to display stale state info of blocks accurately. (#5815). Contributed by Wang Yuanben.

Reviewed-by: Shilun Fan <slfan1989@apache.org>
Reviewed-by: Xing Lin <linxingnku@gmail.com>
Reviewed-by: Hualong Zhang <hualong.z@hotmail.com>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
WangYuanben 2023-07-10 18:42:37 +08:00 committed by GitHub
parent a84284e974
commit 37b2d36edd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 0 deletions

View File

@ -126,6 +126,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
public static final String ENTERING_MAINTENANCE_STATUS =
"is ENTERING MAINTENANCE";
public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE";
public static final String STALE_STATUS = "is STALE";
public static final String NONEXISTENT_STATUS = "does not exist";
public static final String FAILURE_STATUS = "FAILED";
public static final String UNDEFINED = "undefined";
@ -370,6 +371,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
out.print(ENTERING_MAINTENANCE_STATUS);
} else if (this.showMaintenanceState && dn.isInMaintenance()) {
out.print(IN_MAINTENANCE_STATUS);
} else if (dn.isStale(this.staleInterval)) {
out.print(STALE_STATUS);
} else {
out.print(HEALTHY_STATUS);
}

View File

@ -404,6 +404,8 @@ public class DFSck extends Configured implements Tool {
errCode = 4;
} else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
errCode = 5;
} else if (lastLine.endsWith(NamenodeFsck.STALE_STATUS)) {
errCode = 6;
}
return errCode;
}

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@ -1681,6 +1682,91 @@ public class TestFsck {
assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
}
/**
* Test for blockIdCK with datanode staleness.
*/
@Test
public void testBlockIdCKStaleness() throws Exception {
final short replFactor = 1;
final long blockSize = 512;
Configuration configuration = new Configuration();
// Shorten dfs.namenode.stale.datanode.interval for easier testing.
configuration.setLong(DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5000);
configuration.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
configuration.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
String[] racks = {"/rack1", "/rack2"};
String[] hosts = {"host1", "host2"};
File builderBaseDir = new File(GenericTestUtils.getRandomizedTempPath());
cluster = new MiniDFSCluster.Builder(configuration, builderBaseDir)
.hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
DistributedFileSystem fs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", fs);
try {
DFSTestUtil util = new DFSTestUtil.Builder().
setName(getClass().getSimpleName()).setNumFiles(1).build();
// Create one file.
final String pathString = new String("/testfile");
final Path path = new Path(pathString);
util.createFile(fs, path, 1024L, replFactor, 1024L);
util.waitReplication(fs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb: util.getAllBlocks(fs, path)) {
sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
}
String[] bIds = sb.toString().split(" ");
// Make sure datanode is HEALTHY before down.
String outStr = runFsck(configuration, 0, true, "/", "-blockId", bIds[0]);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
.getDatanodeId());
final String dnName = dn.getXferAddr();
// Make the block on datanode enter stale state.
cluster.stopDataNode(0);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
DatanodeInfo datanodeInfo = null;
for (DatanodeInfo info : fs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isStale(5000)) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
outStr = runFsck(configuration, 6, true, "/", "-blockId", bIds[0]);
assertTrue(outStr.contains(NamenodeFsck.STALE_STATUS));
} finally {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
}
/**
* Test for blockIdCK with block corruption.
*/