Revert "HDFS-11259. Update fsck to display maintenance state info. (Manoj Govindassamy via lei)"
This reverts commit c18590fce2
.
This commit is contained in:
parent
ed09c1418d
commit
2f8e9b7e4b
@ -117,9 +117,6 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
|||||||
public static final String HEALTHY_STATUS = "is HEALTHY";
|
public static final String HEALTHY_STATUS = "is HEALTHY";
|
||||||
public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING";
|
public static final String DECOMMISSIONING_STATUS = "is DECOMMISSIONING";
|
||||||
public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED";
|
public static final String DECOMMISSIONED_STATUS = "is DECOMMISSIONED";
|
||||||
public static final String ENTERING_MAINTENANCE_STATUS =
|
|
||||||
"is ENTERING MAINTENANCE";
|
|
||||||
public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE";
|
|
||||||
public static final String NONEXISTENT_STATUS = "does not exist";
|
public static final String NONEXISTENT_STATUS = "does not exist";
|
||||||
public static final String FAILURE_STATUS = "FAILED";
|
public static final String FAILURE_STATUS = "FAILED";
|
||||||
public static final String UNDEFINED = "undefined";
|
public static final String UNDEFINED = "undefined";
|
||||||
@ -283,10 +280,6 @@ public void blockIdCK(String blockId) {
|
|||||||
+ numberReplicas.decommissioned());
|
+ numberReplicas.decommissioned());
|
||||||
out.println("No. of decommissioning Replica: "
|
out.println("No. of decommissioning Replica: "
|
||||||
+ numberReplicas.decommissioning());
|
+ numberReplicas.decommissioning());
|
||||||
out.println("No. of entering maintenance Replica: "
|
|
||||||
+ numberReplicas.liveEnteringMaintenanceReplicas());
|
|
||||||
out.println("No. of in maintenance Replica: "
|
|
||||||
+ numberReplicas.maintenanceNotForReadReplicas());
|
|
||||||
out.println("No. of corrupted Replica: " +
|
out.println("No. of corrupted Replica: " +
|
||||||
numberReplicas.corruptReplicas());
|
numberReplicas.corruptReplicas());
|
||||||
//record datanodes that have corrupted block replica
|
//record datanodes that have corrupted block replica
|
||||||
@ -307,10 +300,6 @@ public void blockIdCK(String blockId) {
|
|||||||
out.print(DECOMMISSIONED_STATUS);
|
out.print(DECOMMISSIONED_STATUS);
|
||||||
} else if (dn.isDecommissionInProgress()) {
|
} else if (dn.isDecommissionInProgress()) {
|
||||||
out.print(DECOMMISSIONING_STATUS);
|
out.print(DECOMMISSIONING_STATUS);
|
||||||
} else if (dn.isEnteringMaintenance()) {
|
|
||||||
out.print(ENTERING_MAINTENANCE_STATUS);
|
|
||||||
} else if (dn.isInMaintenance()) {
|
|
||||||
out.print(IN_MAINTENANCE_STATUS);
|
|
||||||
} else {
|
} else {
|
||||||
out.print(HEALTHY_STATUS);
|
out.print(HEALTHY_STATUS);
|
||||||
}
|
}
|
||||||
@ -609,10 +598,6 @@ private String getReplicaInfo(BlockInfo storedBlock) {
|
|||||||
sb.append("DECOMMISSIONED)");
|
sb.append("DECOMMISSIONED)");
|
||||||
} else if (dnDesc.isDecommissionInProgress()) {
|
} else if (dnDesc.isDecommissionInProgress()) {
|
||||||
sb.append("DECOMMISSIONING)");
|
sb.append("DECOMMISSIONING)");
|
||||||
} else if (dnDesc.isEnteringMaintenance()) {
|
|
||||||
sb.append("ENTERING MAINTENANCE)");
|
|
||||||
} else if (dnDesc.isInMaintenance()) {
|
|
||||||
sb.append("IN MAINTENANCE)");
|
|
||||||
} else if (corruptReplicas != null
|
} else if (corruptReplicas != null
|
||||||
&& corruptReplicas.contains(dnDesc)) {
|
&& corruptReplicas.contains(dnDesc)) {
|
||||||
sb.append("CORRUPT)");
|
sb.append("CORRUPT)");
|
||||||
@ -635,7 +620,7 @@ private String getReplicaInfo(BlockInfo storedBlock) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void collectBlocksSummary(String parent, HdfsFileStatus file,
|
private void collectBlocksSummary(String parent, HdfsFileStatus file,
|
||||||
Result res, LocatedBlocks blocks) throws IOException {
|
Result res, LocatedBlocks blocks) throws IOException {
|
||||||
String path = file.getFullName(parent);
|
String path = file.getFullName(parent);
|
||||||
boolean isOpen = blocks.isUnderConstruction();
|
boolean isOpen = blocks.isUnderConstruction();
|
||||||
if (isOpen && !showOpenFiles) {
|
if (isOpen && !showOpenFiles) {
|
||||||
@ -666,21 +651,13 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file,
|
|||||||
NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
|
NumberReplicas numberReplicas = blockManager.countNodes(storedBlock);
|
||||||
int decommissionedReplicas = numberReplicas.decommissioned();
|
int decommissionedReplicas = numberReplicas.decommissioned();
|
||||||
int decommissioningReplicas = numberReplicas.decommissioning();
|
int decommissioningReplicas = numberReplicas.decommissioning();
|
||||||
int enteringMaintenanceReplicas =
|
|
||||||
numberReplicas.liveEnteringMaintenanceReplicas();
|
|
||||||
int inMaintenanceReplicas =
|
|
||||||
numberReplicas.maintenanceNotForReadReplicas();
|
|
||||||
res.decommissionedReplicas += decommissionedReplicas;
|
res.decommissionedReplicas += decommissionedReplicas;
|
||||||
res.decommissioningReplicas += decommissioningReplicas;
|
res.decommissioningReplicas += decommissioningReplicas;
|
||||||
res.enteringMaintenanceReplicas += enteringMaintenanceReplicas;
|
|
||||||
res.inMaintenanceReplicas += inMaintenanceReplicas;
|
|
||||||
|
|
||||||
// count total replicas
|
// count total replicas
|
||||||
int liveReplicas = numberReplicas.liveReplicas();
|
int liveReplicas = numberReplicas.liveReplicas();
|
||||||
int totalReplicasPerBlock = liveReplicas + decommissionedReplicas
|
int totalReplicasPerBlock = liveReplicas + decommissionedReplicas +
|
||||||
+ decommissioningReplicas
|
decommissioningReplicas;
|
||||||
+ enteringMaintenanceReplicas
|
|
||||||
+ inMaintenanceReplicas;
|
|
||||||
res.totalReplicas += totalReplicasPerBlock;
|
res.totalReplicas += totalReplicasPerBlock;
|
||||||
|
|
||||||
boolean isMissing;
|
boolean isMissing;
|
||||||
@ -734,14 +711,12 @@ private void collectBlocksSummary(String parent, HdfsFileStatus file,
|
|||||||
if (!showFiles) {
|
if (!showFiles) {
|
||||||
out.print("\n" + path + ": ");
|
out.print("\n" + path + ": ");
|
||||||
}
|
}
|
||||||
out.println(" Under replicated " + block + ". Target Replicas is "
|
out.println(" Under replicated " + block +
|
||||||
+ targetFileReplication + " but found "
|
". Target Replicas is " +
|
||||||
+ liveReplicas+ " live replica(s), "
|
targetFileReplication + " but found " +
|
||||||
+ decommissionedReplicas + " decommissioned replica(s), "
|
liveReplicas + " live replica(s), " +
|
||||||
+ decommissioningReplicas + " decommissioning replica(s), "
|
decommissionedReplicas + " decommissioned replica(s) and " +
|
||||||
+ enteringMaintenanceReplicas
|
decommissioningReplicas + " decommissioning replica(s).");
|
||||||
+ " entering maintenance replica(s) and "
|
|
||||||
+ inMaintenanceReplicas + " in maintenance replica(s).");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// count mis replicated blocks
|
// count mis replicated blocks
|
||||||
@ -1120,8 +1095,6 @@ static class Result {
|
|||||||
long missingReplicas = 0L;
|
long missingReplicas = 0L;
|
||||||
long decommissionedReplicas = 0L;
|
long decommissionedReplicas = 0L;
|
||||||
long decommissioningReplicas = 0L;
|
long decommissioningReplicas = 0L;
|
||||||
long enteringMaintenanceReplicas = 0L;
|
|
||||||
long inMaintenanceReplicas = 0L;
|
|
||||||
long numUnderMinReplicatedBlocks = 0L;
|
long numUnderMinReplicatedBlocks = 0L;
|
||||||
long numOverReplicatedBlocks = 0L;
|
long numOverReplicatedBlocks = 0L;
|
||||||
long numUnderReplicatedBlocks = 0L;
|
long numUnderReplicatedBlocks = 0L;
|
||||||
@ -1270,14 +1243,6 @@ public String toString() {
|
|||||||
res.append("\n DecommissioningReplicas:\t").append(
|
res.append("\n DecommissioningReplicas:\t").append(
|
||||||
decommissioningReplicas);
|
decommissioningReplicas);
|
||||||
}
|
}
|
||||||
if (enteringMaintenanceReplicas > 0) {
|
|
||||||
res.append("\n EnteringMaintenanceReplicas:\t").append(
|
|
||||||
enteringMaintenanceReplicas);
|
|
||||||
}
|
|
||||||
if (inMaintenanceReplicas > 0) {
|
|
||||||
res.append("\n InMaintenanceReplicas:\t").append(
|
|
||||||
inMaintenanceReplicas);
|
|
||||||
}
|
|
||||||
return res.toString();
|
return res.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1384,14 +1349,6 @@ public String toString() {
|
|||||||
res.append("\n Decommissioning internal blocks:\t").append(
|
res.append("\n Decommissioning internal blocks:\t").append(
|
||||||
decommissioningReplicas);
|
decommissioningReplicas);
|
||||||
}
|
}
|
||||||
if (enteringMaintenanceReplicas > 0) {
|
|
||||||
res.append("\n EnteringMaintenanceReplicas:\t").append(
|
|
||||||
enteringMaintenanceReplicas);
|
|
||||||
}
|
|
||||||
if (inMaintenanceReplicas > 0) {
|
|
||||||
res.append("\n InMaintenanceReplicas:\t").append(
|
|
||||||
inMaintenanceReplicas);
|
|
||||||
}
|
|
||||||
return res.toString();
|
return res.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -372,10 +372,6 @@ else if (args[idx].equals("-replicaDetails")) {
|
|||||||
errCode = 2;
|
errCode = 2;
|
||||||
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
|
} else if (lastLine.endsWith(NamenodeFsck.DECOMMISSIONING_STATUS)) {
|
||||||
errCode = 3;
|
errCode = 3;
|
||||||
} else if (lastLine.endsWith(NamenodeFsck.IN_MAINTENANCE_STATUS)) {
|
|
||||||
errCode = 4;
|
|
||||||
} else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
|
|
||||||
errCode = 5;
|
|
||||||
}
|
}
|
||||||
return errCode;
|
return errCode;
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,6 @@
|
|||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeoutException;
|
import java.util.concurrent.TimeoutException;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -169,11 +168,11 @@ static String runFsck(Configuration conf, int expectedErrCode,
|
|||||||
PrintStream out = new PrintStream(bStream, true);
|
PrintStream out = new PrintStream(bStream, true);
|
||||||
GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL);
|
GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL);
|
||||||
int errCode = ToolRunner.run(new DFSck(conf, out), path);
|
int errCode = ToolRunner.run(new DFSck(conf, out), path);
|
||||||
LOG.info("OUTPUT = " + bStream.toString());
|
|
||||||
if (checkErrorCode) {
|
if (checkErrorCode) {
|
||||||
assertEquals(expectedErrCode, errCode);
|
assertEquals(expectedErrCode, errCode);
|
||||||
}
|
}
|
||||||
GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO);
|
GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO);
|
||||||
|
LOG.info("OUTPUT = " + bStream.toString());
|
||||||
return bStream.toString();
|
return bStream.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -879,13 +878,14 @@ public void testUnderMinReplicatedBlock() throws Exception {
|
|||||||
assertTrue(outStr.contains("dfs.namenode.replication.min:\t2"));
|
assertTrue(outStr.contains("dfs.namenode.replication.min:\t2"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 90000)
|
@Test(timeout = 60000)
|
||||||
public void testFsckReplicaDetails() throws Exception {
|
public void testFsckReplicaDetails() throws Exception {
|
||||||
|
|
||||||
final short replFactor = 1;
|
final short replFactor = 1;
|
||||||
short numDn = 1;
|
short numDn = 1;
|
||||||
final long blockSize = 512;
|
final long blockSize = 512;
|
||||||
final long fileSize = 1024;
|
final long fileSize = 1024;
|
||||||
|
boolean checkDecommissionInProgress = false;
|
||||||
String[] racks = {"/rack1"};
|
String[] racks = {"/rack1"};
|
||||||
String[] hosts = {"host1"};
|
String[] hosts = {"host1"};
|
||||||
|
|
||||||
@ -910,110 +910,49 @@ public void testFsckReplicaDetails() throws Exception {
|
|||||||
"-replicaDetails");
|
"-replicaDetails");
|
||||||
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
|
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
assertTrue(fsckOut.contains("(LIVE)"));
|
assertTrue(fsckOut.contains("(LIVE)"));
|
||||||
assertTrue(!fsckOut.contains("(ENTERING MAINTENANCE)"));
|
|
||||||
assertTrue(!fsckOut.contains("(IN MAINTENANCE)"));
|
|
||||||
|
|
||||||
// decommission datanode
|
// decommission datanode
|
||||||
|
ExtendedBlock eb = DFSTestUtil.getFirstBlock(dfs, path);
|
||||||
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
|
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
|
||||||
BlockManager bm = fsn.getBlockManager();
|
BlockManager bm = fsn.getBlockManager();
|
||||||
final DatanodeManager dnm = bm.getDatanodeManager();
|
BlockCollection bc = null;
|
||||||
DatanodeDescriptor dnDesc0 = dnm.getDatanode(
|
try {
|
||||||
cluster.getDataNodes().get(0).getDatanodeId());
|
fsn.writeLock();
|
||||||
|
BlockInfo bi = bm.getStoredBlock(eb.getLocalBlock());
|
||||||
bm.getDatanodeManager().getDecomManager().startDecommission(dnDesc0);
|
bc = fsn.getBlockCollection(bi);
|
||||||
final String dn0Name = dnDesc0.getXferAddr();
|
} finally {
|
||||||
|
fsn.writeUnlock();
|
||||||
|
}
|
||||||
|
DatanodeDescriptor dn = bc.getBlocks()[0]
|
||||||
|
.getDatanode(0);
|
||||||
|
bm.getDatanodeManager().getDecomManager().startDecommission(dn);
|
||||||
|
String dnName = dn.getXferAddr();
|
||||||
|
|
||||||
// check the replica status while decommissioning
|
// check the replica status while decommissioning
|
||||||
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
|
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
|
||||||
"-replicaDetails");
|
"-replicaDetails");
|
||||||
assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
|
assertTrue(fsckOut.contains("(DECOMMISSIONING)"));
|
||||||
assertTrue(!fsckOut.contains("(ENTERING MAINTENANCE)"));
|
|
||||||
assertTrue(!fsckOut.contains("(IN MAINTENANCE)"));
|
|
||||||
|
|
||||||
// Start 2nd DataNode
|
// Start 2nd Datanode and wait for decommission to start
|
||||||
cluster.startDataNodes(conf, 1, true, null,
|
cluster.startDataNodes(conf, 1, true, null, null, null);
|
||||||
new String[] {"/rack2"}, new String[] {"host2"}, null, false);
|
DatanodeInfo datanodeInfo = null;
|
||||||
|
do {
|
||||||
// Wait for decommission to start
|
Thread.sleep(2000);
|
||||||
final AtomicBoolean checkDecommissionInProgress =
|
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
||||||
new AtomicBoolean(false);
|
if (dnName.equals(info.getXferAddr())) {
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
datanodeInfo = info;
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
DatanodeInfo datanodeInfo = null;
|
|
||||||
try {
|
|
||||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
|
||||||
if (dn0Name.equals(info.getXferAddr())) {
|
|
||||||
datanodeInfo = info;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!checkDecommissionInProgress.get() && datanodeInfo != null
|
|
||||||
&& datanodeInfo.isDecommissionInProgress()) {
|
|
||||||
checkDecommissionInProgress.set(true);
|
|
||||||
}
|
|
||||||
if (datanodeInfo != null && datanodeInfo.isDecommissioned()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Unexpected exception: " + e);
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}, 500, 30000);
|
if (!checkDecommissionInProgress && datanodeInfo != null
|
||||||
|
&& datanodeInfo.isDecommissionInProgress()) {
|
||||||
|
checkDecommissionInProgress = true;
|
||||||
|
}
|
||||||
|
} while (datanodeInfo != null && !datanodeInfo.isDecommissioned());
|
||||||
|
|
||||||
// check the replica status after decommission is done
|
// check the replica status after decommission is done
|
||||||
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
|
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
|
||||||
"-replicaDetails");
|
"-replicaDetails");
|
||||||
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
|
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
|
||||||
assertTrue(!fsckOut.contains("(ENTERING MAINTENANCE)"));
|
|
||||||
assertTrue(!fsckOut.contains("(IN MAINTENANCE)"));
|
|
||||||
|
|
||||||
DatanodeDescriptor dnDesc1 = dnm.getDatanode(
|
|
||||||
cluster.getDataNodes().get(1).getDatanodeId());
|
|
||||||
final String dn1Name = dnDesc1.getXferAddr();
|
|
||||||
|
|
||||||
bm.getDatanodeManager().getDecomManager().startMaintenance(dnDesc1,
|
|
||||||
Long.MAX_VALUE);
|
|
||||||
// check the replica status while entering maintenance
|
|
||||||
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
|
|
||||||
"-replicaDetails");
|
|
||||||
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
|
|
||||||
assertTrue(fsckOut.contains("(ENTERING MAINTENANCE)"));
|
|
||||||
assertTrue(!fsckOut.contains("(IN MAINTENANCE)"));
|
|
||||||
|
|
||||||
// Start 3rd DataNode
|
|
||||||
cluster.startDataNodes(conf, 1, true, null,
|
|
||||||
new String[] {"/rack3"}, new String[] {"host3"}, null, false);
|
|
||||||
|
|
||||||
// Wait for the 2nd node to reach in maintenance state
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
DatanodeInfo dnInfo = null;
|
|
||||||
try {
|
|
||||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
|
||||||
if (dn1Name.equals(info.getXferAddr())) {
|
|
||||||
dnInfo = info;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (dnInfo != null && dnInfo.isInMaintenance()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Unexpected exception: " + e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, 500, 30000);
|
|
||||||
|
|
||||||
// check the replica status after decommission is done
|
|
||||||
fsckOut = runFsck(conf, 0, true, testFile, "-files", "-blocks",
|
|
||||||
"-replicaDetails");
|
|
||||||
assertTrue(fsckOut.contains("(DECOMMISSIONED)"));
|
|
||||||
assertTrue(!fsckOut.contains("(ENTERING MAINTENANCE)"));
|
|
||||||
assertTrue(fsckOut.contains("(IN MAINTENANCE)"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test if fsck can return -1 in case of failure.
|
/** Test if fsck can return -1 in case of failure.
|
||||||
@ -1520,118 +1459,6 @@ public void testBlockIdCKDecommission() throws Exception {
|
|||||||
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS));
|
assertTrue(fsckOut.contains(NamenodeFsck.DECOMMISSIONED_STATUS));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Test for blockIdCK with datanode maintenance.
|
|
||||||
*/
|
|
||||||
@Test (timeout = 90000)
|
|
||||||
public void testBlockIdCKMaintenance() throws Exception {
|
|
||||||
final short replFactor = 2;
|
|
||||||
short numDn = 2;
|
|
||||||
final long blockSize = 512;
|
|
||||||
String[] hosts = {"host1", "host2"};
|
|
||||||
String[] racks = {"/rack1", "/rack2"};
|
|
||||||
|
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
|
|
||||||
replFactor);
|
|
||||||
|
|
||||||
DistributedFileSystem dfs;
|
|
||||||
cluster = new MiniDFSCluster.Builder(conf)
|
|
||||||
.numDataNodes(numDn)
|
|
||||||
.hosts(hosts)
|
|
||||||
.racks(racks)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
assertNotNull("Failed Cluster Creation", cluster);
|
|
||||||
cluster.waitClusterUp();
|
|
||||||
dfs = cluster.getFileSystem();
|
|
||||||
assertNotNull("Failed to get FileSystem", dfs);
|
|
||||||
|
|
||||||
DFSTestUtil util = new DFSTestUtil.Builder().
|
|
||||||
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
|
||||||
//create files
|
|
||||||
final String pathString = new String("/testfile");
|
|
||||||
final Path path = new Path(pathString);
|
|
||||||
util.createFile(dfs, path, 1024, replFactor, 1000L);
|
|
||||||
util.waitReplication(dfs, path, replFactor);
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
|
||||||
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
|
||||||
}
|
|
||||||
String[] bIds = sb.toString().split(" ");
|
|
||||||
|
|
||||||
//make sure datanode that has replica is fine before maintenance
|
|
||||||
String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
|
|
||||||
System.out.println(outStr);
|
|
||||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
|
||||||
|
|
||||||
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
|
|
||||||
BlockManager bm = fsn.getBlockManager();
|
|
||||||
DatanodeManager dnm = bm.getDatanodeManager();
|
|
||||||
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
|
|
||||||
.getDatanodeId());
|
|
||||||
bm.getDatanodeManager().getDecomManager().startMaintenance(dn,
|
|
||||||
Long.MAX_VALUE);
|
|
||||||
final String dnName = dn.getXferAddr();
|
|
||||||
|
|
||||||
//wait for the node to enter maintenance state
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
DatanodeInfo datanodeInfo = null;
|
|
||||||
try {
|
|
||||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
|
||||||
if (dnName.equals(info.getXferAddr())) {
|
|
||||||
datanodeInfo = info;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
|
|
||||||
String fsckOut = runFsck(conf, 5, false, "/", "-blockId", bIds[0]);
|
|
||||||
assertTrue(fsckOut.contains(
|
|
||||||
NamenodeFsck.ENTERING_MAINTENANCE_STATUS));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Unexpected exception: " + e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, 500, 30000);
|
|
||||||
|
|
||||||
// Start 3rd DataNode
|
|
||||||
cluster.startDataNodes(conf, 1, true, null,
|
|
||||||
new String[] {"/rack3"}, new String[] {"host3"}, null, false);
|
|
||||||
|
|
||||||
// Wait for 1st node to reach in maintenance state
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
try {
|
|
||||||
DatanodeInfo datanodeInfo = null;
|
|
||||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
|
||||||
if (dnName.equals(info.getXferAddr())) {
|
|
||||||
datanodeInfo = info;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Unexpected exception: " + e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, 500, 30000);
|
|
||||||
|
|
||||||
//check in maintenance node
|
|
||||||
String fsckOut = runFsck(conf, 4, false, "/", "-blockId", bIds[0]);
|
|
||||||
assertTrue(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test for blockIdCK with block corruption.
|
* Test for blockIdCK with block corruption.
|
||||||
*/
|
*/
|
||||||
@ -1828,117 +1655,6 @@ public void testFsckWithDecommissionedReplicas() throws Exception {
|
|||||||
String fsckOut = runFsck(conf, 0, true, testFile);
|
String fsckOut = runFsck(conf, 0, true, testFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Test for blocks on maintenance hosts are not shown as missing.
|
|
||||||
*/
|
|
||||||
@Test (timeout = 90000)
|
|
||||||
public void testFsckWithMaintenanceReplicas() throws Exception {
|
|
||||||
final short replFactor = 2;
|
|
||||||
short numDn = 2;
|
|
||||||
final long blockSize = 512;
|
|
||||||
String[] hosts = {"host1", "host2"};
|
|
||||||
String[] racks = {"/rack1", "/rack2"};
|
|
||||||
|
|
||||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
|
|
||||||
replFactor);
|
|
||||||
|
|
||||||
DistributedFileSystem dfs;
|
|
||||||
cluster = new MiniDFSCluster.Builder(conf)
|
|
||||||
.numDataNodes(numDn)
|
|
||||||
.hosts(hosts)
|
|
||||||
.racks(racks)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
assertNotNull("Failed Cluster Creation", cluster);
|
|
||||||
cluster.waitClusterUp();
|
|
||||||
dfs = cluster.getFileSystem();
|
|
||||||
assertNotNull("Failed to get FileSystem", dfs);
|
|
||||||
|
|
||||||
DFSTestUtil util = new DFSTestUtil.Builder().
|
|
||||||
setName(getClass().getSimpleName()).setNumFiles(1).build();
|
|
||||||
//create files
|
|
||||||
final String testFile = new String("/testfile");
|
|
||||||
final Path path = new Path(testFile);
|
|
||||||
util.createFile(dfs, path, 1024, replFactor, 1000L);
|
|
||||||
util.waitReplication(dfs, path, replFactor);
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
|
|
||||||
sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
|
|
||||||
}
|
|
||||||
String[] bIds = sb.toString().split(" ");
|
|
||||||
|
|
||||||
//make sure datanode that has replica is fine before maintenance
|
|
||||||
String outStr = runFsck(conf, 0, true, testFile);
|
|
||||||
System.out.println(outStr);
|
|
||||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
|
||||||
|
|
||||||
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
|
|
||||||
BlockManager bm = fsn.getBlockManager();
|
|
||||||
DatanodeManager dnm = bm.getDatanodeManager();
|
|
||||||
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
|
|
||||||
.getDatanodeId());
|
|
||||||
bm.getDatanodeManager().getDecomManager().startMaintenance(dn,
|
|
||||||
Long.MAX_VALUE);
|
|
||||||
final String dnName = dn.getXferAddr();
|
|
||||||
|
|
||||||
//wait for the node to enter maintenance state
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
DatanodeInfo datanodeInfo = null;
|
|
||||||
try {
|
|
||||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
|
||||||
if (dnName.equals(info.getXferAddr())) {
|
|
||||||
datanodeInfo = info;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
|
|
||||||
// verify fsck returns Healthy status
|
|
||||||
String fsckOut = runFsck(conf, 0, true, testFile);
|
|
||||||
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Unexpected exception: " + e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, 500, 30000);
|
|
||||||
|
|
||||||
// Start 3rd DataNode and wait for node to reach in maintenance state
|
|
||||||
cluster.startDataNodes(conf, 1, true, null,
|
|
||||||
new String[] {"/rack3"}, new String[] {"host3"}, null, false);
|
|
||||||
|
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
|
||||||
@Override
|
|
||||||
public Boolean get() {
|
|
||||||
DatanodeInfo datanodeInfo = null;
|
|
||||||
try {
|
|
||||||
for (DatanodeInfo info : dfs.getDataNodeStats()) {
|
|
||||||
if (dnName.equals(info.getXferAddr())) {
|
|
||||||
datanodeInfo = info;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Unexpected exception: " + e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}, 500, 30000);
|
|
||||||
|
|
||||||
// verify fsck returns Healthy status
|
|
||||||
String fsckOut = runFsck(conf, 0, true, testFile);
|
|
||||||
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testECFsck() throws Exception {
|
public void testECFsck() throws Exception {
|
||||||
FileSystem fs = null;
|
FileSystem fs = null;
|
||||||
|
Loading…
Reference in New Issue
Block a user