YARN-876. Node resource is added twice when node comes back from unhealthy. (Peng Zhang via Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528660 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sanford Ryza 2013-10-02 22:33:47 +00:00
parent 90a5dc34f7
commit 86cc60c43b
3 changed files with 16 additions and 2 deletions

View File

@ -129,6 +129,9 @@ Release 2.1.2 - UNRELEASED
YARN-1141. Updating resource requests should be decoupled with updating YARN-1141. Updating resource requests should be decoupled with updating
blacklist (Zhijie Shen via bikas) blacklist (Zhijie Shen via bikas)
YARN-876. Node resource is added twice when node comes back from unhealthy
to healthy. (Peng Zhang via Sandy Ryza)
Release 2.1.1-beta - 2013-09-23 Release 2.1.1-beta - 2013-09-23
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -460,8 +460,11 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
&& rmNode.getHttpPort() == newNode.getHttpPort()) { && rmNode.getHttpPort() == newNode.getHttpPort()) {
// Reset heartbeat ID since node just restarted. // Reset heartbeat ID since node just restarted.
rmNode.getLastNodeHeartBeatResponse().setResponseId(0); rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
if (rmNode.getState() != NodeState.UNHEALTHY) {
// Only add new node if old state is not UNHEALTHY
rmNode.context.getDispatcher().getEventHandler().handle( rmNode.context.getDispatcher().getEventHandler().handle(
new NodeAddedSchedulerEvent(rmNode)); new NodeAddedSchedulerEvent(rmNode));
}
} else { } else {
// Reconnected node differs, so replace old node and start new node // Reconnected node differs, so replace old node and start new node
switch (rmNode.getState()) { switch (rmNode.getState()) {

View File

@ -507,6 +507,14 @@ protected Dispatcher createDispatcher() {
Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs()); Assert.assertEquals(expectedNMs, ClusterMetrics.getMetrics().getNumActiveNMs());
checkUnealthyNMCount(rm, nm2, true, 1); checkUnealthyNMCount(rm, nm2, true, 1);
// unhealthy node changed back to healthy
nm2 = rm.registerNode("host2:5678", 5120);
dispatcher.await();
response = nm2.nodeHeartbeat(true);
response = nm2.nodeHeartbeat(true);
dispatcher.await();
Assert.assertEquals(5120 + 5120, metrics.getAvailableMB());
// reconnect of node with changed capability // reconnect of node with changed capability
nm1 = rm.registerNode("host2:5678", 10240); nm1 = rm.registerNode("host2:5678", 10240);
dispatcher.await(); dispatcher.await();