HDFS-9500. Fix software version counts for DataNodes during rolling upgrade. Contributed by Erik Krogen.
This commit is contained in:
parent
022bf783aa
commit
f3ac1f41b8
@ -759,17 +759,25 @@ private void decrementVersionCount(String version) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Will return true for all Datanodes which have a non-null software
|
||||||
|
* version and are considered alive (by {@link DatanodeDescriptor#isAlive()}),
|
||||||
|
* indicating the node has not yet been removed. Use {@code isAlive}
|
||||||
|
* rather than {@link DatanodeManager#isDatanodeDead(DatanodeDescriptor)}
|
||||||
|
* to ensure that the version is decremented even if the datanode
|
||||||
|
* hasn't issued a heartbeat recently.
|
||||||
|
*
|
||||||
|
* @param node The datanode in question
|
||||||
|
* @return True iff its version count should be decremented
|
||||||
|
*/
|
||||||
private boolean shouldCountVersion(DatanodeDescriptor node) {
|
private boolean shouldCountVersion(DatanodeDescriptor node) {
|
||||||
return node.getSoftwareVersion() != null && node.isAlive() &&
|
return node.getSoftwareVersion() != null && node.isAlive();
|
||||||
!isDatanodeDead(node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void countSoftwareVersions() {
|
private void countSoftwareVersions() {
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
datanodesSoftwareVersions.clear();
|
datanodesSoftwareVersions.clear();
|
||||||
for(DatanodeDescriptor dn: datanodeMap.values()) {
|
for(DatanodeDescriptor dn: datanodeMap.values()) {
|
||||||
// Check isAlive too because right after removeDatanode(),
|
|
||||||
// isDatanodeDead() is still true
|
|
||||||
if (shouldCountVersion(dn)) {
|
if (shouldCountVersion(dn)) {
|
||||||
Integer num = datanodesSoftwareVersions.get(dn.getSoftwareVersion());
|
Integer num = datanodesSoftwareVersions.get(dn.getSoftwareVersion());
|
||||||
num = num == null ? 1 : num+1;
|
num = num == null ? 1 : num+1;
|
||||||
|
@ -85,6 +85,43 @@ private static InetSocketAddress entry(String host) {
|
|||||||
return HostFileManager.parseEntry("dummy", "dummy", host);
|
return HostFileManager.parseEntry("dummy", "dummy", host);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This test checks that if a node is re-registered with a new software
|
||||||
|
* version after the heartbeat expiry interval but before the HeartbeatManager
|
||||||
|
* has a chance to detect this and remove it, the node's version will still
|
||||||
|
* be correctly decremented.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testNumVersionsCorrectAfterReregister()
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
//Create the DatanodeManager which will be tested
|
||||||
|
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
|
||||||
|
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 0);
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 10);
|
||||||
|
DatanodeManager dm = mockDatanodeManager(fsn, conf);
|
||||||
|
|
||||||
|
String storageID = "someStorageID1";
|
||||||
|
String ip = "someIP" + storageID;
|
||||||
|
|
||||||
|
// Register then reregister the same node but with a different version
|
||||||
|
for (int i = 0; i <= 1; i++) {
|
||||||
|
dm.registerDatanode(new DatanodeRegistration(
|
||||||
|
new DatanodeID(ip, "", storageID, 9000, 0, 0, 0),
|
||||||
|
null, null, "version" + i));
|
||||||
|
if (i == 0) {
|
||||||
|
Thread.sleep(25);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Verify DatanodeManager has the correct count
|
||||||
|
Map<String, Integer> mapToCheck = dm.getDatanodesSoftwareVersions();
|
||||||
|
assertNull("should be no more version0 nodes", mapToCheck.get("version0"));
|
||||||
|
assertEquals("should be one version1 node",
|
||||||
|
mapToCheck.get("version1").intValue(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This test sends a random sequence of node registrations and node removals
|
* This test sends a random sequence of node registrations and node removals
|
||||||
* to the DatanodeManager (of nodes with different IDs and versions), and
|
* to the DatanodeManager (of nodes with different IDs and versions), and
|
||||||
|
Loading…
Reference in New Issue
Block a user