HDFS-11412. Maintenance minimum replication config value allowable range should be [0, DefaultReplication]. (Manoj Govindassamy via mingma)

This commit is contained in:
Ming Ma 2017-03-01 20:23:52 -08:00
parent 6f6dfe0202
commit 25c84d279b
3 changed files with 124 additions and 4 deletions

View File

@ -484,12 +484,12 @@ public BlockManager(final Namesystem namesystem, boolean haEnabled,
+ DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY
+ " = " + minMaintenanceR + " < 0");
}
if (minMaintenanceR > minR) {
if (minMaintenanceR > defaultReplication) {
throw new IOException("Unexpected configuration parameters: "
+ DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY
+ " = " + minMaintenanceR + " > "
+ DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY
+ " = " + minR);
+ DFSConfigKeys.DFS_REPLICATION_KEY
+ " = " + defaultReplication);
}
this.minReplicationToBeInMaintenance = (short)minMaintenanceR;
@ -825,7 +825,8 @@ private short getMinMaintenanceStorageNum(BlockInfo block) {
if (block.isStriped()) {
return ((BlockInfoStriped) block).getRealDataBlockNum();
} else {
return minReplicationToBeInMaintenance;
return (short) Math.min(minReplicationToBeInMaintenance,
block.getReplication());
}
}

View File

@ -807,6 +807,14 @@ private void initMiniDFSCluster(
int replication = conf.getInt(DFS_REPLICATION_KEY, 3);
conf.setInt(DFS_REPLICATION_KEY, Math.min(replication, numDataNodes));
int maintenanceMinReplication = conf.getInt(
DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_DEFAULT);
if (maintenanceMinReplication ==
DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_DEFAULT) {
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
Math.min(maintenanceMinReplication, numDataNodes));
}
int safemodeExtension = conf.getInt(
DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY, 0);
conf.setInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, safemodeExtension);

View File

@ -20,6 +20,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.ArrayList;
@ -30,6 +31,7 @@
import java.util.Map;
import com.google.common.collect.Lists;
import org.junit.Assert;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
@ -66,6 +68,38 @@ void setMinMaintenanceR(int minMaintenanceR) {
minMaintenanceR);
}
/**
* Test valid value range for the config namenode.maintenance.replication.min.
*/
@Test (timeout = 60000)
public void testMaintenanceMinReplConfigRange() {
LOG.info("Setting testMaintenanceMinReplConfigRange");
// Case 1: Maintenance min replication less allowed minimum 0
setMinMaintenanceR(-1);
try {
startCluster(1, 1);
fail("Cluster start should fail when 'dfs.namenode.maintenance" +
".replication.min=-1'");
} catch (IOException e) {
LOG.info("Expected exception: " + e);
}
// Case 2: Maintenance min replication greater
// allowed max of DFSConfigKeys.DFS_REPLICATION_KEY
int defaultRepl = getConf().getInt(
DFSConfigKeys.DFS_REPLICATION_KEY,
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
setMinMaintenanceR(defaultRepl + 1);
try {
startCluster(1, 1);
fail("Cluster start should fail when 'dfs.namenode.maintenance" +
".replication.min > " + defaultRepl + "'");
} catch (IOException e) {
LOG.info("Expected exception: " + e);
}
}
/**
* Verify a node can transition from AdminStates.ENTERING_MAINTENANCE to
* AdminStates.NORMAL.
@ -406,6 +440,83 @@ public void testZeroMinMaintenanceReplicationWithExpiration()
cleanupFile(fileSys, file);
}
/**
* Test file block replication lesser than maintenance minimum.
*/
@Test(timeout = 360000)
public void testFileBlockReplicationAffectingMaintenance()
throws Exception {
int defaultReplication = getConf().getInt(DFSConfigKeys
.DFS_REPLICATION_KEY, DFSConfigKeys.DFS_REPLICATION_DEFAULT);
int defaultMaintenanceMinRepl = getConf().getInt(DFSConfigKeys
.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY,
DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_DEFAULT);
// Case 1:
// * Maintenance min larger than default min replication
// * File block replication larger than maintenance min
// * Initial data nodes not sufficient to remove all maintenance nodes
// as file block replication is greater than maintenance min.
// * Data nodes added later for the state transition to progress
int maintenanceMinRepl = defaultMaintenanceMinRepl + 1;
int fileBlockReplication = maintenanceMinRepl + 1;
int numAddedDataNodes = 1;
int numInitialDataNodes = (maintenanceMinRepl * 2 - numAddedDataNodes);
Assert.assertTrue(maintenanceMinRepl <= defaultReplication);
testFileBlockReplicationImpl(maintenanceMinRepl,
numInitialDataNodes, numAddedDataNodes, fileBlockReplication);
// Case 2:
// * Maintenance min larger than default min replication
// * File block replication lesser than maintenance min
// * Initial data nodes after removal of maintenance nodes is still
// sufficient for the file block replication.
// * No new data nodes to be added, still the state transition happens
maintenanceMinRepl = defaultMaintenanceMinRepl + 1;
fileBlockReplication = maintenanceMinRepl - 1;
numAddedDataNodes = 0;
numInitialDataNodes = (maintenanceMinRepl * 2 - numAddedDataNodes);
testFileBlockReplicationImpl(maintenanceMinRepl,
numInitialDataNodes, numAddedDataNodes, fileBlockReplication);
}
private void testFileBlockReplicationImpl(
int maintenanceMinRepl, int numDataNodes, int numNewDataNodes,
int fileBlockRepl)
throws Exception {
setup();
LOG.info("Starting testLargerMinMaintenanceReplication - maintMinRepl: "
+ maintenanceMinRepl + ", numDNs: " + numDataNodes + ", numNewDNs: "
+ numNewDataNodes + ", fileRepl: " + fileBlockRepl);
LOG.info("Setting maintenance minimum replication: " + maintenanceMinRepl);
setMinMaintenanceR(maintenanceMinRepl);
startCluster(1, numDataNodes);
final Path file = new Path("/testLargerMinMaintenanceReplication.dat");
FileSystem fileSys = getCluster().getFileSystem(0);
writeFile(fileSys, file, fileBlockRepl, 1);
final DatanodeInfo[] nodes = getFirstBlockReplicasDatanodeInfos(fileSys,
file);
ArrayList<String> nodeUuids = new ArrayList<>();
for (int i = 0; i < maintenanceMinRepl && i < nodes.length; i++) {
nodeUuids.add(nodes[i].getDatanodeUuid());
}
List<DatanodeInfo> maintenanceDNs = takeNodeOutofService(0, nodeUuids,
Long.MAX_VALUE, null, null, AdminStates.ENTERING_MAINTENANCE);
for (int i = 0; i < numNewDataNodes; i++) {
getCluster().startDataNodes(getConf(), 1, true, null, null);
}
getCluster().waitActive();
refreshNodes(0);
waitNodeState(maintenanceDNs, AdminStates.IN_MAINTENANCE);
cleanupFile(fileSys, file);
teardown();
}
/**
* Transition from IN_MAINTENANCE to DECOMMISSIONED.
*/