YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1579014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Karthik Kambatla 2014-03-18 19:00:46 +00:00
parent 534a5ee346
commit bfafc1d0ce
4 changed files with 40 additions and 7 deletions

View File

@ -173,6 +173,8 @@ Release 2.4.0 - UNRELEASED
YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler.
(Robert Kanter via kasha)
YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)
IMPROVEMENTS
YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via

View File

@ -81,6 +81,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@ -840,6 +841,8 @@ void stopActiveServices() throws Exception {
rmContext.getRMNodes().clear();
rmContext.getInactiveRMNodes().clear();
rmContext.getRMApps().clear();
ClusterMetrics.destroy();
QueueMetrics.clearQueueMetrics();
}
}

View File

@ -127,7 +127,7 @@ static QueueMetrics forQueue(String queueName, Queue parent,
}
/**
* Helper method to clear cache - used only for unit tests.
* Helper method to clear cache.
*/
@Private
public synchronized static void clearQueueMetrics() {

View File

@ -32,6 +32,7 @@
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.junit.Before;
import org.junit.Test;
@ -138,32 +139,38 @@ public void testStartAndTransitions() throws IOException {
rm.start();
checkMonitorHealth();
checkStandbyRMFunctionality();
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
// 1. Transition to Standby - must be a no-op
rm.adminService.transitionToStandby(requestInfo);
checkMonitorHealth();
checkStandbyRMFunctionality();
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
// 2. Transition to active
rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
// 3. Transition to active - no-op
rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
verifyClusterMetrics(1, 2, 2, 2, 2048, 2);
// 4. Transition to standby
rm.adminService.transitionToStandby(requestInfo);
checkMonitorHealth();
checkStandbyRMFunctionality();
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
// 5. Transition to active to check Active->Standby->Active works
rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
// 6. Stop the RM. All services should stop and RM should not be ready to
// become active
rm.stop();
@ -367,6 +374,27 @@ public void testHAWithRMHostName() {
fail("Should not throw any exceptions.");
}
}
private void verifyClusterMetrics(int activeNodes, int appsSubmitted,
int appsPending, int containersPending, int availableMB,
int activeApplications) {
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// verify queue metrics
assertMetric("appsSubmitted", appsSubmitted, metrics.getAppsSubmitted());
assertMetric("appsPending", appsPending, metrics.getAppsPending());
assertMetric("containersPending", containersPending,
metrics.getPendingContainers());
assertMetric("availableMB", availableMB, metrics.getAvailableMB());
assertMetric("activeApplications", activeApplications,
metrics.getActiveApps());
// verify node metric
ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
assertMetric("activeNodes", activeNodes, clusterMetrics.getNumActiveNMs());
}
private void assertMetric(String metricName, int expected, int actual) {
assertEquals("Incorrect value for metric " + metricName, expected, actual);
}
@SuppressWarnings("rawtypes")
class MyCountingDispatcher extends AbstractService implements Dispatcher {