YARN-3226. UI changes for decommissioning node. Contributed by Sunil G.
This commit is contained in:
parent
5cb1e0118b
commit
1de56b0448
@ -280,6 +280,9 @@ Release 2.8.0 - UNRELEASED
|
|||||||
YARN-3623. Add a new config to indicate the Timeline Service version.
|
YARN-3623. Add a new config to indicate the Timeline Service version.
|
||||||
(Xuan Gong via junping_du)
|
(Xuan Gong via junping_du)
|
||||||
|
|
||||||
|
YARN-3226. UI changes for decommissioning node. (Sunil G via
|
||||||
|
junping_du)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-644. Basic null check is not performed on passed in arguments before
|
YARN-644. Basic null check is not performed on passed in arguments before
|
||||||
|
@ -40,6 +40,7 @@ public class ClusterMetrics {
|
|||||||
private static AtomicBoolean isInitialized = new AtomicBoolean(false);
|
private static AtomicBoolean isInitialized = new AtomicBoolean(false);
|
||||||
|
|
||||||
@Metric("# of active NMs") MutableGaugeInt numActiveNMs;
|
@Metric("# of active NMs") MutableGaugeInt numActiveNMs;
|
||||||
|
@Metric("# of decommissioning NMs") MutableGaugeInt numDecommissioningNMs;
|
||||||
@Metric("# of decommissioned NMs") MutableGaugeInt numDecommissionedNMs;
|
@Metric("# of decommissioned NMs") MutableGaugeInt numDecommissionedNMs;
|
||||||
@Metric("# of lost NMs") MutableGaugeInt numLostNMs;
|
@Metric("# of lost NMs") MutableGaugeInt numLostNMs;
|
||||||
@Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs;
|
@Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs;
|
||||||
@ -87,6 +88,23 @@ public int getNumActiveNMs() {
|
|||||||
return numActiveNMs.value();
|
return numActiveNMs.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decommissioning NMs
|
||||||
|
public int getNumDecommissioningNMs() {
|
||||||
|
return numDecommissioningNMs.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrDecommissioningNMs() {
|
||||||
|
numDecommissioningNMs.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDecommissioningNMs(int num) {
|
||||||
|
numDecommissioningNMs.set(num);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void decrDecommissioningNMs() {
|
||||||
|
numDecommissioningNMs.decr();
|
||||||
|
}
|
||||||
|
|
||||||
//Decommisioned NMs
|
//Decommisioned NMs
|
||||||
public int getNumDecommisionedNMs() {
|
public int getNumDecommisionedNMs() {
|
||||||
return numDecommissionedNMs.value();
|
return numDecommissionedNMs.value();
|
||||||
|
@ -647,13 +647,34 @@ private void updateMetricsForRejoinedNode(NodeState previousNodeState) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Treats nodes in decommissioning as active nodes
|
// Update metrics when moving to Decommissioning state
|
||||||
// TODO we may want to differentiate active nodes and decommissioning node in
|
private void updateMetricsForGracefulDecommission(NodeState initialState,
|
||||||
// metrics later.
|
NodeState finalState) {
|
||||||
private void updateMetricsForGracefulDecommissionOnUnhealthyNode() {
|
|
||||||
ClusterMetrics metrics = ClusterMetrics.getMetrics();
|
ClusterMetrics metrics = ClusterMetrics.getMetrics();
|
||||||
metrics.incrNumActiveNodes();
|
switch (initialState) {
|
||||||
metrics.decrNumUnhealthyNMs();
|
case UNHEALTHY :
|
||||||
|
metrics.decrNumUnhealthyNMs();
|
||||||
|
break;
|
||||||
|
case RUNNING :
|
||||||
|
metrics.decrNumActiveNodes();
|
||||||
|
break;
|
||||||
|
case DECOMMISSIONING :
|
||||||
|
metrics.decrDecommissioningNMs();
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
LOG.warn("Unexpcted initial state");
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (finalState) {
|
||||||
|
case DECOMMISSIONING :
|
||||||
|
metrics.incrDecommissioningNMs();
|
||||||
|
break;
|
||||||
|
case RUNNING :
|
||||||
|
metrics.incrNumActiveNodes();
|
||||||
|
break;
|
||||||
|
default :
|
||||||
|
LOG.warn("Unexpected final state");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateMetricsForDeactivatedNode(NodeState initialState,
|
private void updateMetricsForDeactivatedNode(NodeState initialState,
|
||||||
@ -665,18 +686,18 @@ private void updateMetricsForDeactivatedNode(NodeState initialState,
|
|||||||
metrics.decrNumActiveNodes();
|
metrics.decrNumActiveNodes();
|
||||||
break;
|
break;
|
||||||
case DECOMMISSIONING:
|
case DECOMMISSIONING:
|
||||||
metrics.decrNumActiveNodes();
|
metrics.decrDecommissioningNMs();
|
||||||
break;
|
break;
|
||||||
case UNHEALTHY:
|
case UNHEALTHY:
|
||||||
metrics.decrNumUnhealthyNMs();
|
metrics.decrNumUnhealthyNMs();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
LOG.debug("Unexpected inital state");
|
LOG.warn("Unexpected initial state");
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (finalState) {
|
switch (finalState) {
|
||||||
case DECOMMISSIONED:
|
case DECOMMISSIONED:
|
||||||
metrics.incrDecommisionedNMs();
|
metrics.incrDecommisionedNMs();
|
||||||
break;
|
break;
|
||||||
case LOST:
|
case LOST:
|
||||||
metrics.incrNumLostNMs();
|
metrics.incrNumLostNMs();
|
||||||
@ -691,7 +712,7 @@ private void updateMetricsForDeactivatedNode(NodeState initialState,
|
|||||||
metrics.incrNumShutdownNMs();
|
metrics.incrNumShutdownNMs();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
LOG.debug("Unexpected final state");
|
LOG.warn("Unexpected final state");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1014,9 +1035,8 @@ public DecommissioningNodeTransition(NodeState initState,
|
|||||||
@Override
|
@Override
|
||||||
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
||||||
LOG.info("Put Node " + rmNode.nodeId + " in DECOMMISSIONING.");
|
LOG.info("Put Node " + rmNode.nodeId + " in DECOMMISSIONING.");
|
||||||
if (initState.equals(NodeState.UNHEALTHY)) {
|
// Update NM metrics during graceful decommissioning.
|
||||||
rmNode.updateMetricsForGracefulDecommissionOnUnhealthyNode();
|
rmNode.updateMetricsForGracefulDecommission(initState, finalState);
|
||||||
}
|
|
||||||
// TODO (in YARN-3223) Keep NM's available resource to be 0
|
// TODO (in YARN-3223) Keep NM's available resource to be 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1033,6 +1053,8 @@ public RecommissionNodeTransition(NodeState finalState) {
|
|||||||
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
||||||
LOG.info("Node " + rmNode.nodeId + " in DECOMMISSIONING is " +
|
LOG.info("Node " + rmNode.nodeId + " in DECOMMISSIONING is " +
|
||||||
"recommissioned back to RUNNING.");
|
"recommissioned back to RUNNING.");
|
||||||
|
rmNode
|
||||||
|
.updateMetricsForGracefulDecommission(rmNode.getState(), finalState);
|
||||||
// TODO handle NM resource resume in YARN-3223.
|
// TODO handle NM resource resume in YARN-3223.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,8 +53,7 @@ protected void render(Block html) {
|
|||||||
//CSS in the correct spot
|
//CSS in the correct spot
|
||||||
html.style(".metrics {margin-bottom:5px}");
|
html.style(".metrics {margin-bottom:5px}");
|
||||||
|
|
||||||
ClusterMetricsInfo clusterMetrics =
|
ClusterMetricsInfo clusterMetrics = new ClusterMetricsInfo(this.rm);
|
||||||
new ClusterMetricsInfo(this.rm);
|
|
||||||
|
|
||||||
DIV<Hamlet> div = html.div().$class("metrics");
|
DIV<Hamlet> div = html.div().$class("metrics");
|
||||||
|
|
||||||
@ -73,12 +72,6 @@ protected void render(Block html) {
|
|||||||
th().$class("ui-state-default")._("VCores Used")._().
|
th().$class("ui-state-default")._("VCores Used")._().
|
||||||
th().$class("ui-state-default")._("VCores Total")._().
|
th().$class("ui-state-default")._("VCores Total")._().
|
||||||
th().$class("ui-state-default")._("VCores Reserved")._().
|
th().$class("ui-state-default")._("VCores Reserved")._().
|
||||||
th().$class("ui-state-default")._("Active Nodes")._().
|
|
||||||
th().$class("ui-state-default")._("Decommissioned Nodes")._().
|
|
||||||
th().$class("ui-state-default")._("Lost Nodes")._().
|
|
||||||
th().$class("ui-state-default")._("Unhealthy Nodes")._().
|
|
||||||
th().$class("ui-state-default")._("Rebooted Nodes")._().
|
|
||||||
th().$class("ui-state-default")._("Shutdown Nodes")._().
|
|
||||||
_().
|
_().
|
||||||
_().
|
_().
|
||||||
tbody().$class("ui-widget-content").
|
tbody().$class("ui-widget-content").
|
||||||
@ -99,7 +92,26 @@ protected void render(Block html) {
|
|||||||
td(String.valueOf(clusterMetrics.getAllocatedVirtualCores())).
|
td(String.valueOf(clusterMetrics.getAllocatedVirtualCores())).
|
||||||
td(String.valueOf(clusterMetrics.getTotalVirtualCores())).
|
td(String.valueOf(clusterMetrics.getTotalVirtualCores())).
|
||||||
td(String.valueOf(clusterMetrics.getReservedVirtualCores())).
|
td(String.valueOf(clusterMetrics.getReservedVirtualCores())).
|
||||||
|
_().
|
||||||
|
_()._();
|
||||||
|
|
||||||
|
div.h3("Cluster Nodes Metrics").
|
||||||
|
table("#nodemetricsoverview").
|
||||||
|
thead().$class("ui-widget-header").
|
||||||
|
tr().
|
||||||
|
th().$class("ui-state-default")._("Active Nodes")._().
|
||||||
|
th().$class("ui-state-default")._("Decommissioning Nodes")._().
|
||||||
|
th().$class("ui-state-default")._("Decommissioned Nodes")._().
|
||||||
|
th().$class("ui-state-default")._("Lost Nodes")._().
|
||||||
|
th().$class("ui-state-default")._("Unhealthy Nodes")._().
|
||||||
|
th().$class("ui-state-default")._("Rebooted Nodes")._().
|
||||||
|
th().$class("ui-state-default")._("Shutdown Nodes")._().
|
||||||
|
_().
|
||||||
|
_().
|
||||||
|
tbody().$class("ui-widget-content").
|
||||||
|
tr().
|
||||||
td().a(url("nodes"),String.valueOf(clusterMetrics.getActiveNodes()))._().
|
td().a(url("nodes"),String.valueOf(clusterMetrics.getActiveNodes()))._().
|
||||||
|
td().a(url("nodes/decommissioning"), String.valueOf(clusterMetrics.getDecommissioningNodes()))._().
|
||||||
td().a(url("nodes/decommissioned"),String.valueOf(clusterMetrics.getDecommissionedNodes()))._().
|
td().a(url("nodes/decommissioned"),String.valueOf(clusterMetrics.getDecommissionedNodes()))._().
|
||||||
td().a(url("nodes/lost"),String.valueOf(clusterMetrics.getLostNodes()))._().
|
td().a(url("nodes/lost"),String.valueOf(clusterMetrics.getLostNodes()))._().
|
||||||
td().a(url("nodes/unhealthy"),String.valueOf(clusterMetrics.getUnhealthyNodes()))._().
|
td().a(url("nodes/unhealthy"),String.valueOf(clusterMetrics.getUnhealthyNodes()))._().
|
||||||
|
@ -94,6 +94,9 @@ protected void render(Block html) {
|
|||||||
rmNodes = this.rm.getRMContext().getInactiveRMNodes().values();
|
rmNodes = this.rm.getRMContext().getInactiveRMNodes().values();
|
||||||
isInactive = true;
|
isInactive = true;
|
||||||
break;
|
break;
|
||||||
|
case DECOMMISSIONING:
|
||||||
|
// Do nothing
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
LOG.debug("Unexpected state filter for inactive RM node");
|
LOG.debug("Unexpected state filter for inactive RM node");
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,7 @@ public class ClusterMetricsInfo {
|
|||||||
protected int totalNodes;
|
protected int totalNodes;
|
||||||
protected int lostNodes;
|
protected int lostNodes;
|
||||||
protected int unhealthyNodes;
|
protected int unhealthyNodes;
|
||||||
|
protected int decommissioningNodes;
|
||||||
protected int decommissionedNodes;
|
protected int decommissionedNodes;
|
||||||
protected int rebootedNodes;
|
protected int rebootedNodes;
|
||||||
protected int activeNodes;
|
protected int activeNodes;
|
||||||
@ -91,6 +92,7 @@ public ClusterMetricsInfo(final ResourceManager rm) {
|
|||||||
this.activeNodes = clusterMetrics.getNumActiveNMs();
|
this.activeNodes = clusterMetrics.getNumActiveNMs();
|
||||||
this.lostNodes = clusterMetrics.getNumLostNMs();
|
this.lostNodes = clusterMetrics.getNumLostNMs();
|
||||||
this.unhealthyNodes = clusterMetrics.getUnhealthyNMs();
|
this.unhealthyNodes = clusterMetrics.getUnhealthyNMs();
|
||||||
|
this.decommissioningNodes = clusterMetrics.getNumDecommissioningNMs();
|
||||||
this.decommissionedNodes = clusterMetrics.getNumDecommisionedNMs();
|
this.decommissionedNodes = clusterMetrics.getNumDecommisionedNMs();
|
||||||
this.rebootedNodes = clusterMetrics.getNumRebootedNMs();
|
this.rebootedNodes = clusterMetrics.getNumRebootedNMs();
|
||||||
this.shutdownNodes = clusterMetrics.getNumShutdownNMs();
|
this.shutdownNodes = clusterMetrics.getNumShutdownNMs();
|
||||||
@ -186,6 +188,10 @@ public int getUnhealthyNodes() {
|
|||||||
return this.unhealthyNodes;
|
return this.unhealthyNodes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getDecommissioningNodes() {
|
||||||
|
return this.decommissioningNodes;
|
||||||
|
}
|
||||||
|
|
||||||
public int getDecommissionedNodes() {
|
public int getDecommissionedNodes() {
|
||||||
return this.decommissionedNodes;
|
return this.decommissionedNodes;
|
||||||
}
|
}
|
||||||
|
@ -236,29 +236,49 @@ public void testExpiredContainer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStatusUpdateOnDecommissioningNode(){
|
public void testStatusUpdateOnDecommissioningNode() {
|
||||||
RMNodeImpl node = getDecommissioningNode();
|
RMNodeImpl node = getDecommissioningNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
||||||
// Verify node in DECOMMISSIONING won't be changed by status update
|
// Verify node in DECOMMISSIONING won't be changed by status update
|
||||||
// with running apps
|
// with running apps
|
||||||
RMNodeStatusEvent statusEvent = getMockRMNodeStatusEventWithRunningApps();
|
RMNodeStatusEvent statusEvent = getMockRMNodeStatusEventWithRunningApps();
|
||||||
node.handle(statusEvent);
|
node.handle(statusEvent);
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned,
|
||||||
|
cm.getNumDecommisionedNMs());
|
||||||
|
|
||||||
// Verify node in DECOMMISSIONING will be changed by status update
|
// Verify node in DECOMMISSIONING will be changed by status update
|
||||||
// without running apps
|
// without running apps
|
||||||
statusEvent = getMockRMNodeStatusEventWithoutRunningApps();
|
statusEvent = getMockRMNodeStatusEventWithoutRunningApps();
|
||||||
node.handle(statusEvent);
|
node.handle(statusEvent);
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning - 1,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned + 1,
|
||||||
|
cm.getNumDecommisionedNMs());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRecommissionNode(){
|
public void testRecommissionNode() {
|
||||||
RMNodeImpl node = getDecommissioningNode();
|
RMNodeImpl node = getDecommissioningNode();
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(),
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
RMNodeEventType.RECOMMISSION));
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
|
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.RECOMMISSION));
|
||||||
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
||||||
|
Assert
|
||||||
|
.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning - 1,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 5000)
|
@Test (timeout = 5000)
|
||||||
@ -481,16 +501,18 @@ public void testDecommissionOnDecommissioningNode() {
|
|||||||
int initialUnhealthy = cm.getUnhealthyNMs();
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
int initialRebooted = cm.getNumRebootedNMs();
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(),
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
RMNodeEventType.DECOMMISSION));
|
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.DECOMMISSION));
|
||||||
Assert.assertEquals("Active Nodes", initialActive - 1, cm.getNumActiveNMs());
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
||||||
Assert.assertEquals("Unhealthy Nodes",
|
Assert.assertEquals("Unhealthy Nodes", initialUnhealthy,
|
||||||
initialUnhealthy, cm.getUnhealthyNMs());
|
cm.getUnhealthyNMs());
|
||||||
Assert.assertEquals("Decommissioned Nodes",
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning - 1,
|
||||||
initialDecommissioned + 1, cm.getNumDecommisionedNMs());
|
cm.getNumDecommissioningNMs());
|
||||||
Assert.assertEquals("Rebooted Nodes",
|
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned + 1,
|
||||||
initialRebooted, cm.getNumRebootedNMs());
|
cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Rebooted Nodes", initialRebooted,
|
||||||
|
cm.getNumRebootedNMs());
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,16 +547,19 @@ public void testUnhealthyDecommissioning() {
|
|||||||
int initialLost = cm.getNumLostNMs();
|
int initialLost = cm.getNumLostNMs();
|
||||||
int initialUnhealthy = cm.getUnhealthyNMs();
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
int initialRebooted = cm.getNumRebootedNMs();
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(),
|
node.handle(new RMNodeEvent(node.getNodeID(),
|
||||||
RMNodeEventType.GRACEFUL_DECOMMISSION));
|
RMNodeEventType.GRACEFUL_DECOMMISSION));
|
||||||
Assert.assertEquals("Active Nodes", initialActive + 1,
|
Assert.assertEquals("Active Nodes", initialActive,
|
||||||
cm.getNumActiveNMs());
|
cm.getNumActiveNMs());
|
||||||
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
||||||
Assert.assertEquals("Unhealthy Nodes",
|
Assert.assertEquals("Unhealthy Nodes",
|
||||||
initialUnhealthy - 1, cm.getUnhealthyNMs());
|
initialUnhealthy - 1, cm.getUnhealthyNMs());
|
||||||
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned,
|
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned,
|
||||||
cm.getNumDecommisionedNMs());
|
cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning + 1,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
Assert.assertEquals("Rebooted Nodes",
|
Assert.assertEquals("Rebooted Nodes",
|
||||||
initialRebooted, cm.getNumRebootedNMs());
|
initialRebooted, cm.getNumRebootedNMs());
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
||||||
@ -681,9 +706,16 @@ private RMNodeImpl getRunningNode(String nmVersion, int port) {
|
|||||||
|
|
||||||
private RMNodeImpl getDecommissioningNode() {
|
private RMNodeImpl getDecommissioningNode() {
|
||||||
RMNodeImpl node = getRunningNode();
|
RMNodeImpl node = getRunningNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(),
|
node.handle(new RMNodeEvent(node.getNodeID(),
|
||||||
RMNodeEventType.GRACEFUL_DECOMMISSION));
|
RMNodeEventType.GRACEFUL_DECOMMISSION));
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
||||||
|
Assert
|
||||||
|
.assertEquals("Active Nodes", initialActive - 1, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning + 1,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -774,16 +806,30 @@ public void testReconnect() {
|
|||||||
@Test
|
@Test
|
||||||
public void testReconnectOnDecommissioningNode() {
|
public void testReconnectOnDecommissioningNode() {
|
||||||
RMNodeImpl node = getDecommissioningNode();
|
RMNodeImpl node = getDecommissioningNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
|
||||||
// Reconnect event with running app
|
// Reconnect event with running app
|
||||||
node.handle(new RMNodeReconnectEvent(node.getNodeID(), node,
|
node.handle(new RMNodeReconnectEvent(node.getNodeID(), node,
|
||||||
getAppIdList(), null));
|
getAppIdList(), null));
|
||||||
// still decommissioning
|
// still decommissioning
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState());
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned,
|
||||||
|
cm.getNumDecommisionedNMs());
|
||||||
|
|
||||||
// Reconnect event without any running app
|
// Reconnect event without any running app
|
||||||
node.handle(new RMNodeReconnectEvent(node.getNodeID(), node, null, null));
|
node.handle(new RMNodeReconnectEvent(node.getNodeID(), node, null, null));
|
||||||
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState());
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning - 1,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes", initialDecommissioned + 1,
|
||||||
|
cm.getNumDecommisionedNMs());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -846,17 +892,26 @@ public void testResourceUpdateOnNewNode() {
|
|||||||
@Test
|
@Test
|
||||||
public void testResourceUpdateOnRebootedNode() {
|
public void testResourceUpdateOnRebootedNode() {
|
||||||
RMNodeImpl node = getRebootedNode();
|
RMNodeImpl node = getRebootedNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialUnHealthy = cm.getUnhealthyNMs();
|
||||||
|
int initialDecommissioning = cm.getNumDecommissioningNMs();
|
||||||
Resource oldCapacity = node.getTotalCapability();
|
Resource oldCapacity = node.getTotalCapability();
|
||||||
assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096);
|
assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096);
|
||||||
assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4);
|
assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4);
|
||||||
node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(),
|
node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption
|
||||||
ResourceOption.newInstance(Resource.newInstance(2048, 2),
|
.newInstance(Resource.newInstance(2048, 2),
|
||||||
ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT)));
|
ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT)));
|
||||||
Resource newCapacity = node.getTotalCapability();
|
Resource newCapacity = node.getTotalCapability();
|
||||||
assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048);
|
assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048);
|
||||||
assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2);
|
assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2);
|
||||||
|
|
||||||
Assert.assertEquals(NodeState.REBOOTED, node.getState());
|
Assert.assertEquals(NodeState.REBOOTED, node.getState());
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Unhelathy Nodes", initialUnHealthy,
|
||||||
|
cm.getUnhealthyNMs());
|
||||||
|
Assert.assertEquals("Decommissioning Nodes", initialDecommissioning,
|
||||||
|
cm.getNumDecommissioningNMs());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test unhealthy report on a decommissioning node will make it
|
// Test unhealthy report on a decommissioning node will make it
|
||||||
|
@ -47,7 +47,7 @@ public class TestNodesPage {
|
|||||||
|
|
||||||
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
|
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
|
||||||
// future. In that case this value should be adjusted to the new value.
|
// future. In that case this value should be adjusted to the new value.
|
||||||
final int numberOfThInMetricsTable = 22;
|
final int numberOfThInMetricsTable = 23;
|
||||||
final int numberOfActualTableHeaders = 13;
|
final int numberOfActualTableHeaders = 13;
|
||||||
|
|
||||||
private Injector injector;
|
private Injector injector;
|
||||||
|
@ -429,7 +429,7 @@ public void verifyClusterMetricsJSON(JSONObject json) throws JSONException,
|
|||||||
Exception {
|
Exception {
|
||||||
assertEquals("incorrect number of elements", 1, json.length());
|
assertEquals("incorrect number of elements", 1, json.length());
|
||||||
JSONObject clusterinfo = json.getJSONObject("clusterMetrics");
|
JSONObject clusterinfo = json.getJSONObject("clusterMetrics");
|
||||||
assertEquals("incorrect number of elements", 24, clusterinfo.length());
|
assertEquals("incorrect number of elements", 25, clusterinfo.length());
|
||||||
verifyClusterMetrics(
|
verifyClusterMetrics(
|
||||||
clusterinfo.getInt("appsSubmitted"), clusterinfo.getInt("appsCompleted"),
|
clusterinfo.getInt("appsSubmitted"), clusterinfo.getInt("appsCompleted"),
|
||||||
clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"),
|
clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"),
|
||||||
|
Loading…
Reference in New Issue
Block a user