YARN-4308. ContainersAggregated CPU resource utilization reports negative usage in first few heartbeats. Contributed by Sunil G
This commit is contained in:
parent
ae047655f4
commit
1500a0a300
@ -467,6 +467,14 @@ private BigInteger getTotalProcessJiffies() {
|
|||||||
return totalStime.add(BigInteger.valueOf(totalUtime));
|
return totalStime.add(BigInteger.valueOf(totalUtime));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the CPU usage by all the processes in the process-tree in Unix.
|
||||||
|
* Note: UNAVAILABLE will be returned in case when CPU usage is not
|
||||||
|
* available. It is NOT advised to return any other error code.
|
||||||
|
*
|
||||||
|
* @return percentage CPU usage since the process-tree was created,
|
||||||
|
* {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
BigInteger processTotalJiffies = getTotalProcessJiffies();
|
BigInteger processTotalJiffies = getTotalProcessJiffies();
|
||||||
|
@ -187,9 +187,11 @@ public long getCumulativeCpuTime() {
|
|||||||
* Get the CPU usage by all the processes in the process-tree based on
|
* Get the CPU usage by all the processes in the process-tree based on
|
||||||
* average between samples as a ratio of overall CPU cycles similar to top.
|
* average between samples as a ratio of overall CPU cycles similar to top.
|
||||||
* Thus, if 2 out of 4 cores are used this should return 200.0.
|
* Thus, if 2 out of 4 cores are used this should return 200.0.
|
||||||
|
* Note: UNAVAILABLE will be returned in case when CPU usage is not
|
||||||
|
* available. It is NOT advised to return any other error code.
|
||||||
*
|
*
|
||||||
* @return percentage CPU usage since the process-tree was created,
|
* @return percentage CPU usage since the process-tree was created,
|
||||||
* {@link #UNAVAILABLE} if it cannot be calculated.
|
* {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
|
||||||
*/
|
*/
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
return UNAVAILABLE;
|
return UNAVAILABLE;
|
||||||
|
@ -268,6 +268,14 @@ private BigInteger getTotalProcessMs() {
|
|||||||
return BigInteger.valueOf(totalMs);
|
return BigInteger.valueOf(totalMs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the CPU usage by all the processes in the process-tree in Windows.
|
||||||
|
* Note: UNAVAILABLE will be returned in case when CPU usage is not
|
||||||
|
* available. It is NOT advised to return any other error code.
|
||||||
|
*
|
||||||
|
* @return percentage CPU usage since the process-tree was created,
|
||||||
|
* {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
BigInteger processTotalMs = getTotalProcessMs();
|
BigInteger processTotalMs = getTotalProcessMs();
|
||||||
|
@ -455,6 +455,15 @@ public void run() {
|
|||||||
// cpuUsagePercentPerCore should be 300% and
|
// cpuUsagePercentPerCore should be 300% and
|
||||||
// cpuUsageTotalCoresPercentage should be 50%
|
// cpuUsageTotalCoresPercentage should be 50%
|
||||||
float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
|
float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
|
||||||
|
if (cpuUsagePercentPerCore < 0) {
|
||||||
|
// CPU usage is not available likely because the container just
|
||||||
|
// started. Let us skip this turn and consider this container
|
||||||
|
// in the next iteration.
|
||||||
|
LOG.info("Skipping monitoring container " + containerId
|
||||||
|
+ " since CPU usage is not yet available.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
|
float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
|
||||||
resourceCalculatorPlugin.getNumProcessors();
|
resourceCalculatorPlugin.getNumProcessors();
|
||||||
|
|
||||||
|
@ -0,0 +1,70 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mock class to obtain resource usage (CPU).
|
||||||
|
*/
|
||||||
|
public class MockCPUResourceCalculatorProcessTree
|
||||||
|
extends ResourceCalculatorProcessTree {
|
||||||
|
|
||||||
|
private long cpuPercentage = ResourceCalculatorProcessTree.UNAVAILABLE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor for MockCPUResourceCalculatorProcessTree with specified root
|
||||||
|
* process.
|
||||||
|
* @param root
|
||||||
|
*/
|
||||||
|
public MockCPUResourceCalculatorProcessTree(String root) {
|
||||||
|
super(root);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateProcessTree() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getProcessTreeDump() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getCumulativeCpuTime() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean checkPidPgrpidForMatch() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getCpuUsagePercent() {
|
||||||
|
long cpu = this.cpuPercentage;
|
||||||
|
// First getter call will be returned with -1, and other calls will
|
||||||
|
// return non-zero value as defined below.
|
||||||
|
if (cpu == ResourceCalculatorProcessTree.UNAVAILABLE) {
|
||||||
|
// Set a default value other than 0 for test.
|
||||||
|
this.cpuPercentage = 50;
|
||||||
|
}
|
||||||
|
return cpu;
|
||||||
|
}
|
||||||
|
}
|
@ -54,4 +54,9 @@ public void setRssMemorySize(long rssMemorySize) {
|
|||||||
public long getRssMemorySize() {
|
public long getRssMemorySize() {
|
||||||
return this.rssMemorySize;
|
return this.rssMemorySize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getCpuUsagePercent() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,8 +27,8 @@
|
|||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
@ -43,19 +43,21 @@
|
|||||||
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
|
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
|
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext;
|
import org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertNull;
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
|
|
||||||
public class TestContainersMonitorResourceChange {
|
public class TestContainersMonitorResourceChange {
|
||||||
|
|
||||||
|
static final Logger LOG = Logger
|
||||||
|
.getLogger(TestContainersMonitorResourceChange.class);
|
||||||
private ContainersMonitorImpl containersMonitor;
|
private ContainersMonitorImpl containersMonitor;
|
||||||
private MockExecutor executor;
|
private MockExecutor executor;
|
||||||
private Configuration conf;
|
private Configuration conf;
|
||||||
@ -63,6 +65,8 @@ public class TestContainersMonitorResourceChange {
|
|||||||
private Context context;
|
private Context context;
|
||||||
private MockContainerEventHandler containerEventHandler;
|
private MockContainerEventHandler containerEventHandler;
|
||||||
|
|
||||||
|
static final int WAIT_MS_PER_LOOP = 20; // 20 milli seconds
|
||||||
|
|
||||||
private static class MockExecutor extends ContainerExecutor {
|
private static class MockExecutor extends ContainerExecutor {
|
||||||
@Override
|
@Override
|
||||||
public void init() throws IOException {
|
public void init() throws IOException {
|
||||||
@ -232,6 +236,60 @@ public void testContainersResourceChangeIsTriggeredImmediately()
|
|||||||
containersMonitor.stop();
|
containersMonitor.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContainersCPUResourceForDefaultValue() throws Exception {
|
||||||
|
Configuration newConf = new Configuration(conf);
|
||||||
|
// set container monitor interval to be 20s
|
||||||
|
newConf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
|
||||||
|
containersMonitor = createContainersMonitor(executor, dispatcher, context);
|
||||||
|
newConf.set(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
|
||||||
|
MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
|
||||||
|
// set container monitor interval to be 20ms
|
||||||
|
containersMonitor.init(newConf);
|
||||||
|
containersMonitor.start();
|
||||||
|
|
||||||
|
// create container 1
|
||||||
|
containersMonitor.handle(new ContainerStartMonitoringEvent(
|
||||||
|
getContainerId(1), 2100L, 1000L, 1, 0, 0));
|
||||||
|
|
||||||
|
// Verify the container utilization value.
|
||||||
|
// Since MockCPUResourceCalculatorProcessTree will return a -1 as CPU
|
||||||
|
// utilization, containersUtilization will not be calculated and hence it
|
||||||
|
// will be 0.
|
||||||
|
assertEquals(
|
||||||
|
"Resource utilization must be default with MonitorThread's first run",
|
||||||
|
0, containersMonitor.getContainersUtilization()
|
||||||
|
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
|
||||||
|
|
||||||
|
// Verify the container utilization value. Since atleast one round is done,
|
||||||
|
// we can expect a non-zero value for container utilization as
|
||||||
|
// MockCPUResourceCalculatorProcessTree#getCpuUsagePercent will return 50.
|
||||||
|
waitForContainerResourceUtilizationChange(containersMonitor, 100);
|
||||||
|
|
||||||
|
containersMonitor.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void waitForContainerResourceUtilizationChange(
|
||||||
|
ContainersMonitorImpl containersMonitor, int timeoutMsecs)
|
||||||
|
throws InterruptedException {
|
||||||
|
int timeWaiting = 0;
|
||||||
|
while (0 == containersMonitor.getContainersUtilization()
|
||||||
|
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f))) {
|
||||||
|
if (timeWaiting >= timeoutMsecs) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info(
|
||||||
|
"Monitor thread is waiting for resource utlization change.");
|
||||||
|
Thread.sleep(WAIT_MS_PER_LOOP);
|
||||||
|
timeWaiting += WAIT_MS_PER_LOOP;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue("Resource utilization is not changed from second run onwards",
|
||||||
|
0 != containersMonitor.getContainersUtilization()
|
||||||
|
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
|
||||||
|
}
|
||||||
|
|
||||||
private ContainersMonitorImpl createContainersMonitor(
|
private ContainersMonitorImpl createContainersMonitor(
|
||||||
ContainerExecutor containerExecutor, AsyncDispatcher dispatcher,
|
ContainerExecutor containerExecutor, AsyncDispatcher dispatcher,
|
||||||
Context context) {
|
Context context) {
|
||||||
|
Loading…
Reference in New Issue
Block a user