YARN-1101. Active nodes can be decremented below 0 (Robert Parker via tgraves_
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1518384 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82fc0f1855
commit
56c1b9de0c
@ -117,6 +117,9 @@ Release 2.1.1-beta - UNRELEASED
|
|||||||
YARN-602. Fixed NodeManager to not let users override some mandatory
|
YARN-602. Fixed NodeManager to not let users override some mandatory
|
||||||
environmental variables. (Kenji Kikushima via vinodkv)
|
environmental variables. (Kenji Kikushima via vinodkv)
|
||||||
|
|
||||||
|
YARN-1101. Active nodes can be decremented below 0 (Robert Parker
|
||||||
|
via tgraves)
|
||||||
|
|
||||||
Release 2.1.0-beta - 2013-08-22
|
Release 2.1.0-beta - 2013-08-22
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
@ -1235,6 +1238,9 @@ Release 0.23.10 - UNRELEASED
|
|||||||
|
|
||||||
YARN-337. RM handles killed application tracking URL poorly (jlowe)
|
YARN-337. RM handles killed application tracking URL poorly (jlowe)
|
||||||
|
|
||||||
|
YARN-1101. Active nodes can be decremented below 0 (Robert Parker
|
||||||
|
via tgraves)
|
||||||
|
|
||||||
Release 0.23.9 - 2013-07-08
|
Release 0.23.9 - 2013-07-08
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -393,9 +393,18 @@ private void updateMetricsForRejoinedNode(NodeState previousNodeState) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateMetricsForDeactivatedNode(NodeState finalState) {
|
private void updateMetricsForDeactivatedNode(NodeState initialState,
|
||||||
|
NodeState finalState) {
|
||||||
ClusterMetrics metrics = ClusterMetrics.getMetrics();
|
ClusterMetrics metrics = ClusterMetrics.getMetrics();
|
||||||
metrics.decrNumActiveNodes();
|
|
||||||
|
switch (initialState) {
|
||||||
|
case RUNNING:
|
||||||
|
metrics.decrNumActiveNodes();
|
||||||
|
break;
|
||||||
|
case UNHEALTHY:
|
||||||
|
metrics.decrNumUnhealthyNMs();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
switch (finalState) {
|
switch (finalState) {
|
||||||
case DECOMMISSIONED:
|
case DECOMMISSIONED:
|
||||||
@ -505,7 +514,8 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
|||||||
// If the current state is NodeState.UNHEALTHY
|
// If the current state is NodeState.UNHEALTHY
|
||||||
// Then node is already been removed from the
|
// Then node is already been removed from the
|
||||||
// Scheduler
|
// Scheduler
|
||||||
if (!rmNode.getState().equals(NodeState.UNHEALTHY)) {
|
NodeState initialState = rmNode.getState();
|
||||||
|
if (!initialState.equals(NodeState.UNHEALTHY)) {
|
||||||
rmNode.context.getDispatcher().getEventHandler()
|
rmNode.context.getDispatcher().getEventHandler()
|
||||||
.handle(new NodeRemovedSchedulerEvent(rmNode));
|
.handle(new NodeRemovedSchedulerEvent(rmNode));
|
||||||
}
|
}
|
||||||
@ -520,7 +530,7 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
|||||||
rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode);
|
rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode);
|
||||||
|
|
||||||
//Update the metrics
|
//Update the metrics
|
||||||
rmNode.updateMetricsForDeactivatedNode(finalState);
|
rmNode.updateMetricsForDeactivatedNode(initialState, finalState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -550,7 +560,8 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
|||||||
new NodesListManagerEvent(
|
new NodesListManagerEvent(
|
||||||
NodesListManagerEventType.NODE_UNUSABLE, rmNode));
|
NodesListManagerEventType.NODE_UNUSABLE, rmNode));
|
||||||
// Update metrics
|
// Update metrics
|
||||||
rmNode.updateMetricsForDeactivatedNode(NodeState.UNHEALTHY);
|
rmNode.updateMetricsForDeactivatedNode(rmNode.getState(),
|
||||||
|
NodeState.UNHEALTHY);
|
||||||
return NodeState.UNHEALTHY;
|
return NodeState.UNHEALTHY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,7 +267,21 @@ public void testRunningExpire() {
|
|||||||
@Test
|
@Test
|
||||||
public void testUnhealthyExpire() {
|
public void testUnhealthyExpire() {
|
||||||
RMNodeImpl node = getUnhealthyNode();
|
RMNodeImpl node = getUnhealthyNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialLost = cm.getNumLostNMs();
|
||||||
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
|
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Lost Nodes", initialLost + 1, cm.getNumLostNMs());
|
||||||
|
Assert.assertEquals("Unhealthy Nodes",
|
||||||
|
initialUnhealthy - 1, cm.getUnhealthyNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes",
|
||||||
|
initialDecommissioned, cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Rebooted Nodes",
|
||||||
|
initialRebooted, cm.getNumRebootedNMs());
|
||||||
Assert.assertEquals(NodeState.LOST, node.getState());
|
Assert.assertEquals(NodeState.LOST, node.getState());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -291,8 +305,22 @@ public void testRunningDecommission() {
|
|||||||
@Test
|
@Test
|
||||||
public void testUnhealthyDecommission() {
|
public void testUnhealthyDecommission() {
|
||||||
RMNodeImpl node = getUnhealthyNode();
|
RMNodeImpl node = getUnhealthyNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialLost = cm.getNumLostNMs();
|
||||||
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(),
|
node.handle(new RMNodeEvent(node.getNodeID(),
|
||||||
RMNodeEventType.DECOMMISSION));
|
RMNodeEventType.DECOMMISSION));
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
||||||
|
Assert.assertEquals("Unhealthy Nodes",
|
||||||
|
initialUnhealthy - 1, cm.getUnhealthyNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes",
|
||||||
|
initialDecommissioned + 1, cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Rebooted Nodes",
|
||||||
|
initialRebooted, cm.getNumRebootedNMs());
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -307,8 +335,22 @@ public void testRunningRebooting() {
|
|||||||
@Test
|
@Test
|
||||||
public void testUnhealthyRebooting() {
|
public void testUnhealthyRebooting() {
|
||||||
RMNodeImpl node = getUnhealthyNode();
|
RMNodeImpl node = getUnhealthyNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialLost = cm.getNumLostNMs();
|
||||||
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(),
|
node.handle(new RMNodeEvent(node.getNodeID(),
|
||||||
RMNodeEventType.REBOOTING));
|
RMNodeEventType.REBOOTING));
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
||||||
|
Assert.assertEquals("Unhealthy Nodes",
|
||||||
|
initialUnhealthy - 1, cm.getUnhealthyNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes",
|
||||||
|
initialDecommissioned, cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Rebooted Nodes",
|
||||||
|
initialRebooted + 1, cm.getNumRebootedNMs());
|
||||||
Assert.assertEquals(NodeState.REBOOTED, node.getState());
|
Assert.assertEquals(NodeState.REBOOTED, node.getState());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user