diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index a1c474f958..6774cf0e5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerAllocator; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; @@ -125,4 +126,6 @@ public interface Context { ContainerStateTransitionListener getContainerStateTransitionListener(); ResourcePluginManager getResourcePluginManager(); + + NodeManagerMetrics getNodeManagerMetrics(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index bddc7c34ee..179b01e607 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -205,7 +205,7 @@ protected NodeLabelsProvider createNodeLabelsProvider(Configuration conf) } protected NodeResourceMonitor createNodeResourceMonitor() { - return new NodeResourceMonitorImpl(); + return new NodeResourceMonitorImpl(context); } protected ContainerManagerImpl createContainerManager(Context context, @@ -242,6 +242,7 @@ protected NMContext createNMContext( NMContext nmContext = new NMContext(containerTokenSecretManager, nmTokenSecretManager, dirsHandler, aclsManager, stateStore, isDistSchedulerEnabled, conf); + nmContext.setNodeManagerMetrics(metrics); DefaultContainerStateListener defaultListener = new DefaultContainerStateListener(); nmContext.setContainerStateTransitionListener(defaultListener); @@ -574,6 +575,8 @@ public static class NMContext implements Context { private Configuration conf = null; + private NodeManagerMetrics metrics = null; + protected final ConcurrentMap applications = new ConcurrentHashMap(); @@ -823,6 +826,20 @@ public ResourcePluginManager getResourcePluginManager() { return resourcePluginManager; } + /** + * Returns the {@link NodeManagerMetrics} instance of this node. + * This might return a null if the instance was not set to the context. + * @return node manager metrics. + */ + @Override + public NodeManagerMetrics getNodeManagerMetrics() { + return metrics; + } + + public void setNodeManagerMetrics(NodeManagerMetrics nmMetrics) { + this.metrics = nmMetrics; + } + public void setResourcePluginManager( ResourcePluginManager resourcePluginManager) { this.resourcePluginManager = resourcePluginManager; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java index 8b96ba5874..d489835586 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java @@ -22,6 +22,7 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.api.records.ResourceUtilization; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,12 +49,14 @@ public class NodeResourceMonitorImpl extends AbstractService implements /** Current resource utilization of the node. */ private ResourceUtilization nodeUtilization; + private Context nmContext; + /** * Initialize the node resource monitor. */ - public NodeResourceMonitorImpl() { + public NodeResourceMonitorImpl(Context context) { super(NodeResourceMonitorImpl.class.getName()); - + this.nmContext = context; this.monitoringThread = new MonitoringThread(); } @@ -149,6 +152,15 @@ public void run() { (int) (vmem >> 20), // B -> MB vcores); // Used Virtual Cores + // Publish the node utilization metrics to node manager + // metrics system. + NodeManagerMetrics nmMetrics = nmContext.getNodeManagerMetrics(); + if (nmMetrics != null) { + nmMetrics.setNodeUsedMemGB(nodeUtilization.getPhysicalMemory()); + nmMetrics.setNodeUsedVMemGB(nodeUtilization.getVirtualMemory()); + nmMetrics.setNodeCpuUtilization(nodeUtilization.getCPU()); + } + try { Thread.sleep(monitoringInterval); } catch (InterruptedException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index 2b99cc7254..48ec147aef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -20,6 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -488,6 +489,18 @@ public void run() { // Save the aggregated utilization of the containers setContainersUtilization(trackedContainersUtilization); + // Publish the container utilization metrics to node manager + // metrics system. + NodeManagerMetrics nmMetrics = context.getNodeManagerMetrics(); + if (nmMetrics != null) { + nmMetrics.setContainerUsedMemGB( + trackedContainersUtilization.getPhysicalMemory()); + nmMetrics.setContainerUsedVMemGB( + trackedContainersUtilization.getVirtualMemory()); + nmMetrics.setContainerCpuUtilization( + trackedContainersUtilization.getCPU()); + } + try { Thread.sleep(monitoringInterval); } catch (InterruptedException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index a59bb5c609..f0abfd4ff7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -24,6 +24,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeFloat; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.yarn.api.records.Resource; @@ -77,6 +78,18 @@ public class NodeManagerMetrics { MutableGaugeLong publicBytesDeleted; @Metric("# of bytes deleted from the private local cache") MutableGaugeLong privateBytesDeleted; + @Metric("Current used physical memory by all containers in GB") + MutableGaugeInt containerUsedMemGB; + @Metric("Current used virtual memory by all containers in GB") + MutableGaugeInt containerUsedVMemGB; + @Metric("Aggregated CPU utilization of all containers") + MutableGaugeFloat containerCpuUtilization; + @Metric("Current used memory by this node in GB") + MutableGaugeInt nodeUsedMemGB; + @Metric("Current used virtual memory by this node in GB") + MutableGaugeInt nodeUsedVMemGB; + @Metric("Current CPU utilization") + MutableGaugeFloat nodeCpuUtilization; // CHECKSTYLE:ON:VisibilityModifier @@ -316,4 +329,52 @@ public long getPublicBytesDeleted() { public long getPrivateBytesDeleted() { return this.privateBytesDeleted.value(); } + + public void setContainerUsedMemGB(long usedMem) { + this.containerUsedMemGB.set((int)Math.floor(usedMem/1024d)); + } + + public int getContainerUsedMemGB() { + return this.containerUsedMemGB.value(); + } + + public void setContainerUsedVMemGB(long usedVMem) { + this.containerUsedVMemGB.set((int)Math.floor(usedVMem/1024d)); + } + + public int getContainerUsedVMemGB() { + return this.containerUsedVMemGB.value(); + } + + public void setContainerCpuUtilization(float cpuUtilization) { + this.containerCpuUtilization.set(cpuUtilization); + } + + public float getContainerCpuUtilization() { + return this.containerCpuUtilization.value(); + } + + public void setNodeUsedMemGB(long totalUsedMemGB) { + this.nodeUsedMemGB.set((int)Math.floor(totalUsedMemGB/1024d)); + } + + public int getNodeUsedMemGB() { + return nodeUsedMemGB.value(); + } + + public void setNodeUsedVMemGB(long totalUsedVMemGB) { + this.nodeUsedVMemGB.set((int)Math.floor(totalUsedVMemGB/1024d)); + } + + public int getNodeUsedVMemGB() { + return nodeUsedVMemGB.value(); + } + + public float getNodeCpuUtilization() { + return nodeCpuUtilization.value(); + } + + public void setNodeCpuUtilization(float cpuUtilization) { + this.nodeCpuUtilization.set(cpuUtilization); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java index 3c2c3860ee..000e8eaf56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeResourceMonitor.java @@ -18,18 +18,46 @@ package org.apache.hadoop.yarn.server.nodemanager; -import org.apache.hadoop.fs.UnsupportedFileSystemException; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import java.io.IOException; +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.nodemanager.containermanager + .BaseContainerManagerTest; +import org.apache.hadoop.yarn.server.nodemanager.containermanager + .monitor.MockResourceCalculatorPlugin; + +import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.timeout; public class TestNodeResourceMonitor extends BaseContainerManagerTest { public TestNodeResourceMonitor() throws UnsupportedFileSystemException { super(); } + @Before + public void setup() throws IOException { + // Enable node resource monitor with a mocked resource calculator. + conf.set( + YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, + MockResourceCalculatorPlugin.class.getCanonicalName()); + super.setup(); + } + @Test - public void testNodeResourceMonitor() { - NodeResourceMonitor nrm = new NodeResourceMonitorImpl(); + public void testMetricsUpdate() throws Exception { + // This test doesn't verify the correction of those metrics + // updated by the monitor, it only verifies that the monitor + // do publish these info to node manager metrics system in + // each monitor interval. + Context spyContext = spy(context); + NodeResourceMonitor nrm = new NodeResourceMonitorImpl(spyContext); + nrm.init(conf); + nrm.start(); + Mockito.verify(spyContext, timeout(500).atLeastOnce()) + .getNodeManagerMetrics(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 0319dbe1bf..18d1dcef4c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePluginManager; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredAMRMProxyState; @@ -802,5 +803,10 @@ public ContainerExecutor getContainerExecutor() { public ResourcePluginManager getResourcePluginManager() { return null; } + + @Override + public NodeManagerMetrics getNodeManagerMetrics() { + return null; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java index 0dc5c5b3ce..64d117a5a5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java @@ -70,4 +70,9 @@ public long getCumulativeCpuTime() { public float getCpuUsagePercentage() { return 0; } + + @Override + public float getNumVCoresUsed() { + return 0; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 6f7fadfa05..5f72a4c25a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -22,6 +22,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.timeout; import java.io.BufferedReader; import java.io.File; @@ -67,7 +69,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin; import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; @@ -75,6 +76,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; import org.slf4j.LoggerFactory; public class TestContainersMonitor extends BaseContainerManagerTest { @@ -95,6 +97,22 @@ public void setup() throws IOException { super.setup(); } + @Test + public void testMetricsUpdate() throws Exception { + // This test doesn't verify the correction of those metrics + // updated by the monitor, it only verifies that the monitor + // do publish these info to node manager metrics system in + // each monitor interval. + Context spyContext = spy(context); + ContainersMonitorImpl cm = + new ContainersMonitorImpl(mock(ContainerExecutor.class), + mock(AsyncDispatcher.class), spyContext); + cm.init(getConfForCM(false, true, 1024, 2.1f)); + cm.start(); + Mockito.verify(spyContext, timeout(500).atLeastOnce()) + .getNodeManagerMetrics(); + } + /** * Test to verify the check for whether a process tree is over limit or not. *