From 3cd02b95224e9d43fd63a4ef9ac5c44f113f710d Mon Sep 17 00:00:00 2001 From: Jonathan Eagles Date: Mon, 3 Aug 2015 10:38:05 -0500 Subject: [PATCH] YARN-3978. Configurably turn off the saving of container info in Generic AHS (Eric Payne via jeagles) --- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/conf/YarnConfiguration.java | 9 +++ .../hadoop/yarn/server/webapp/AppBlock.java | 12 +-- .../rmcontainer/RMContainerImpl.java | 43 +++++++++- .../resourcemanager/TestClientRMService.java | 1 + .../rmcontainer/TestRMContainerImpl.java | 79 ++++++++++++++++++- .../capacity/TestChildQueueOrder.java | 1 + .../scheduler/capacity/TestLeafQueue.java | 2 + .../scheduler/capacity/TestReservations.java | 6 +- .../scheduler/fifo/TestFifoScheduler.java | 2 + 10 files changed, 146 insertions(+), 12 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 61b3cce32c..cd8cbd3b24 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -733,6 +733,9 @@ Release 2.7.2 - UNRELEASED YARN-3967. Fetch the application report from the AHS if the RM does not know about it. (Mit Desai via xgong) + YARN-3978. Configurably turn off the saving of container info in Generic AHS + (Eric Payne via jeagles) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 98327296df..f1baf5c6f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -1435,6 +1435,15 @@ private static void addDeprecatedKeys() { public static final String APPLICATION_HISTORY_STORE = APPLICATION_HISTORY_PREFIX + "store-class"; + /** Save container meta-info in the application history store. */ + @Private + public static final String + APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO = + APPLICATION_HISTORY_PREFIX + "save-non-am-container-meta-info"; + @Private + public static final boolean + DEFAULT_APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO = true; + /** URI for FileSystemApplicationHistoryStore */ @Private public static final String FS_APPLICATION_HISTORY_STORE_URI = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java index eec32b29d9..871bac3596 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/AppBlock.java @@ -266,11 +266,13 @@ protected void generateApplicationTable(Block html, @Override public ContainerReport run() throws Exception { ContainerReport report = null; - try { - report = appBaseProt.getContainerReport(request) - .getContainerReport(); - } catch (ContainerNotFoundException ex) { - LOG.warn(ex.getMessage()); + if (request.getContainerId() != null) { + try { + report = appBaseProt.getContainerReport(request) + .getContainerReport(); + } catch (ContainerNotFoundException ex) { + LOG.warn(ex.getMessage()); + } } return report; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index 940f76f1cb..a3d8beea56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -173,6 +174,8 @@ public RMContainerImpl(Container container, .currentTimeMillis(), ""); } + private boolean saveNonAMContainerMetaInfo; + public RMContainerImpl(Container container, ApplicationAttemptId appAttemptId, NodeId nodeId, String user, RMContext rmContext, String nodeLabelExpression) { @@ -201,9 +204,21 @@ public RMContainerImpl(Container container, this.readLock = lock.readLock(); this.writeLock = lock.writeLock(); + saveNonAMContainerMetaInfo = rmContext.getYarnConfiguration().getBoolean( + YarnConfiguration.APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO, + YarnConfiguration + .DEFAULT_APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO); + rmContext.getRMApplicationHistoryWriter().containerStarted(this); - rmContext.getSystemMetricsPublisher().containerCreated( - this, this.creationTime); + + // If saveNonAMContainerMetaInfo is true, store system metrics for all + // containers. If false, and if this container is marked as the AM, metrics + // will still be published for this container, but that calculation happens + // later. + if (saveNonAMContainerMetaInfo) { + rmContext.getSystemMetricsPublisher().containerCreated( + this, this.creationTime); + } } @Override @@ -376,6 +391,15 @@ public void setAMContainer(boolean isAMContainer) { } finally { writeLock.unlock(); } + + // Even if saveNonAMContainerMetaInfo is not true, the AM container's system + // metrics still need to be saved so that the AM's logs can be accessed. + // This call to getSystemMetricsPublisher().containerCreated() is mutually + // exclusive with the one in the RMContainerImpl constructor. + if (!saveNonAMContainerMetaInfo && this.isAMContainer) { + rmContext.getSystemMetricsPublisher().containerCreated( + this, this.creationTime); + } } @Override @@ -516,8 +540,19 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { container.rmContext.getRMApplicationHistoryWriter().containerFinished( container); - container.rmContext.getSystemMetricsPublisher().containerFinished( - container, container.finishTime); + + boolean saveNonAMContainerMetaInfo = + container.rmContext.getYarnConfiguration().getBoolean( + YarnConfiguration + .APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO, + YarnConfiguration + .DEFAULT_APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO); + + if (saveNonAMContainerMetaInfo || container.isAMContainer()) { + container.rmContext.getSystemMetricsPublisher().containerFinished( + container, container.finishTime); + } + } private static void updateAttemptMetrics(RMContainerImpl container) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 20343a5185..b9e1d811f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -1172,6 +1172,7 @@ private void mockRMContext(YarnScheduler yarnScheduler, RMContext rmContext) when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); + when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); ConcurrentHashMap apps = getRMApps(rmContext, yarnScheduler); when(rmContext.getRMApps()).thenReturn(apps); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java index 21aba3bfec..e4e2049d4d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java @@ -19,12 +19,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.any; -import static org.mockito.Mockito.anyLong; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -270,4 +272,77 @@ public void testExistenceOfResourceRequestInRMContainer() throws Exception { Assert.assertNull(scheduler.getRMContainer(containerId2) .getResourceRequests()); } + + @Test (timeout = 180000) + public void testStoreAllContainerMetrics() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); + MockRM rm1 = new MockRM(conf); + + SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); + rm1.getRMContext().setSystemMetricsPublisher(publisher); + + rm1.start(); + MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000); + RMApp app1 = rm1.submitApp(1024); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING); + + // request a container. + am1.allocate("127.0.0.1", 1024, 1, new ArrayList()); + ContainerId containerId2 = ContainerId.newContainerId( + am1.getApplicationAttemptId(), 2); + rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED); + am1.allocate(new ArrayList(), new ArrayList()) + .getAllocatedContainers(); + rm1.waitForState(nm1, containerId2, RMContainerState.ACQUIRED); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.COMPLETE); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE); + rm1.waitForState(nm1, containerId2, RMContainerState.COMPLETED); + rm1.stop(); + + // RMContainer should be publishing system metrics for all containers. + // Since there is 1 AM container and 1 non-AM container, there should be 2 + // container created events and 2 container finished events. + verify(publisher, times(2)).containerCreated(any(RMContainer.class), anyLong()); + verify(publisher, times(2)).containerFinished(any(RMContainer.class), anyLong()); + } + + @Test (timeout = 180000) + public void testStoreOnlyAMContainerMetrics() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); + conf.setBoolean( + YarnConfiguration.APPLICATION_HISTORY_SAVE_NON_AM_CONTAINER_META_INFO, + false); + MockRM rm1 = new MockRM(conf); + + SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); + rm1.getRMContext().setSystemMetricsPublisher(publisher); + + rm1.start(); + MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000); + RMApp app1 = rm1.submitApp(1024); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.RUNNING); + + // request a container. + am1.allocate("127.0.0.1", 1024, 1, new ArrayList()); + ContainerId containerId2 = ContainerId.newContainerId( + am1.getApplicationAttemptId(), 2); + rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED); + am1.allocate(new ArrayList(), new ArrayList()) + .getAllocatedContainers(); + rm1.waitForState(nm1, containerId2, RMContainerState.ACQUIRED); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.COMPLETE); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE); + rm1.waitForState(nm1, containerId2, RMContainerState.COMPLETED); + rm1.stop(); + + // RMContainer should be publishing system metrics only for AM container. + verify(publisher, times(1)).containerCreated(any(RMContainer.class), anyLong()); + verify(publisher, times(1)).containerFinished(any(RMContainer.class), anyLong()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java index 31661da810..295a31a0b5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java @@ -270,6 +270,7 @@ public void testSortedQueues() throws Exception { when(rmContext.getDispatcher()).thenReturn(drainDispatcher); when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); + when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( app_0.getApplicationId(), 1); ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index d225bd049f..d63130088f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -168,6 +168,8 @@ public void setUp() throws Exception { cs.start(); when(spyRMContext.getScheduler()).thenReturn(cs); + when(spyRMContext.getYarnConfiguration()) + .thenReturn(new YarnConfiguration()); when(cs.getNumClusterNodes()).thenReturn(3); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java index fff4a8645d..66ad3a82e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java @@ -129,7 +129,9 @@ private void setup(CapacitySchedulerConfiguration csConf) throws Exception { spyRMContext = spy(rmContext); when(spyRMContext.getScheduler()).thenReturn(cs); - + when(spyRMContext.getYarnConfiguration()) + .thenReturn(new YarnConfiguration()); + cs.setRMContext(spyRMContext); cs.init(csConf); cs.start(); @@ -642,6 +644,7 @@ public void testGetAppToUnreserve() throws Exception { when(rmContext.getDispatcher()).thenReturn(drainDispatcher); when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); + when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( app_0.getApplicationId(), 1); ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); @@ -711,6 +714,7 @@ public void testFindNodeToUnreserve() throws Exception { when(rmContext.getDispatcher()).thenReturn(drainDispatcher); when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); + when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( app_0.getApplicationId(), 1); ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index a454801ca3..5f9030fd66 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -222,6 +222,7 @@ public void testNodeLocalAssignment() throws Exception { scheduler); ((RMContextImpl) rmContext).setSystemMetricsPublisher( mock(SystemMetricsPublisher.class)); + ((RMContextImpl) rmContext).setYarnConfiguration(new YarnConfiguration()); scheduler.setRMContext(rmContext); scheduler.init(conf); @@ -303,6 +304,7 @@ public Map getNodes(){ scheduler); ((RMContextImpl) rmContext).setSystemMetricsPublisher( mock(SystemMetricsPublisher.class)); + ((RMContextImpl) rmContext).setYarnConfiguration(new YarnConfiguration()); NullRMNodeLabelsManager nlm = new NullRMNodeLabelsManager(); nlm.init(new Configuration()); rmContext.setNodeLabelManager(nlm);