YARN-4811. Generate histograms in ContainerMetrics for actual container resource usage
This commit is contained in:
parent
7a021471c3
commit
0dd9bcab97
@ -31,6 +31,7 @@
|
||||
import org.apache.hadoop.metrics2.MetricsInfo;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.metrics2.util.Quantile;
|
||||
import org.apache.hadoop.metrics2.util.QuantileEstimator;
|
||||
import org.apache.hadoop.metrics2.util.SampleQuantiles;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
@ -54,7 +55,7 @@ public class MutableQuantiles extends MutableMetric {
|
||||
private final MetricsInfo[] quantileInfos;
|
||||
private final int interval;
|
||||
|
||||
private SampleQuantiles estimator;
|
||||
private QuantileEstimator estimator;
|
||||
private long previousCount = 0;
|
||||
|
||||
@VisibleForTesting
|
||||
@ -134,6 +135,10 @@ public int getInterval() {
|
||||
return interval;
|
||||
}
|
||||
|
||||
public synchronized void setEstimator(QuantileEstimator quantileEstimator) {
|
||||
this.estimator = quantileEstimator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Runnable used to periodically roll over the internal
|
||||
* {@link SampleQuantiles} every interval.
|
||||
|
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.metrics2.util;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public interface QuantileEstimator {
|
||||
|
||||
void insert(long value);
|
||||
|
||||
Map<Quantile, Long> snapshot();
|
||||
|
||||
long getCount();
|
||||
|
||||
void clear();
|
||||
}
|
@ -47,7 +47,7 @@
|
||||
*
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class SampleQuantiles {
|
||||
public class SampleQuantiles implements QuantileEstimator {
|
||||
|
||||
/**
|
||||
* Total number of items in stream
|
||||
|
@ -117,6 +117,11 @@
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.codahale.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
</dependency>
|
||||
<!--
|
||||
junit must be before mockito-all on the classpath. mockito-all bundles its
|
||||
own copy of the hamcrest classes, but they don't match our junit version.
|
||||
|
@ -18,6 +18,9 @@
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
||||
|
||||
import com.codahale.metrics.Histogram;
|
||||
import com.codahale.metrics.Snapshot;
|
||||
import com.codahale.metrics.UniformReservoir;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||
import org.apache.hadoop.metrics2.MetricsInfo;
|
||||
@ -29,13 +32,17 @@
|
||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
||||
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||
import org.apache.hadoop.metrics2.lib.MutableStat;
|
||||
import org.apache.hadoop.metrics2.util.Quantile;
|
||||
import org.apache.hadoop.metrics2.util.QuantileEstimator;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import static org.apache.hadoop.metrics2.lib.Interns.info;
|
||||
|
||||
@ -47,10 +54,13 @@ public class ContainerMetrics implements MetricsSource {
|
||||
public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
|
||||
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
|
||||
public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
|
||||
public static final String PMEM_USAGE_QUANTILES_NAME = "pMemUsageMBHistogram";
|
||||
public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs";
|
||||
public static final String LOCALIZATION_DURATION_METRIC_NAME =
|
||||
"localizationDurationMs";
|
||||
private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
|
||||
private static final String PHY_CPU_USAGE_QUANTILES_NAME =
|
||||
"pCpuUsagePercentHistogram";
|
||||
|
||||
// Use a multiplier of 1000 to avoid losing too much precision when
|
||||
// converting to integers
|
||||
@ -59,6 +69,9 @@ public class ContainerMetrics implements MetricsSource {
|
||||
@Metric
|
||||
public MutableStat pMemMBsStat;
|
||||
|
||||
@Metric
|
||||
public MutableQuantiles pMemMBQuantiles;
|
||||
|
||||
// This tracks overall CPU percentage of the machine in terms of percentage
|
||||
// of 1 core similar to top
|
||||
// Thus if you use 2 cores completely out of 4 available cores this value
|
||||
@ -66,6 +79,9 @@ public class ContainerMetrics implements MetricsSource {
|
||||
@Metric
|
||||
public MutableStat cpuCoreUsagePercent;
|
||||
|
||||
@Metric
|
||||
public MutableQuantiles cpuCoreUsagePercentQuantiles;
|
||||
|
||||
@Metric
|
||||
public MutableStat milliVcoresUsed;
|
||||
|
||||
@ -127,9 +143,23 @@ public class ContainerMetrics implements MetricsSource {
|
||||
|
||||
this.pMemMBsStat = registry.newStat(
|
||||
PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true);
|
||||
this.pMemMBQuantiles = registry
|
||||
.newQuantiles(PMEM_USAGE_QUANTILES_NAME, "Physical memory quantiles",
|
||||
"Usage", "MBs", 1);
|
||||
ContainerMetricsQuantiles memEstimator =
|
||||
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
|
||||
pMemMBQuantiles.setEstimator(memEstimator);
|
||||
|
||||
this.cpuCoreUsagePercent = registry.newStat(
|
||||
PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats",
|
||||
"Usage", "Percents", true);
|
||||
this.cpuCoreUsagePercentQuantiles = registry
|
||||
.newQuantiles(PHY_CPU_USAGE_QUANTILES_NAME,
|
||||
"Physical Cpu core percent usage quantiles", "Usage", "Percents",
|
||||
1);
|
||||
ContainerMetricsQuantiles cpuEstimator =
|
||||
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
|
||||
cpuCoreUsagePercentQuantiles.setEstimator(cpuEstimator);
|
||||
this.milliVcoresUsed = registry.newStat(
|
||||
VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage",
|
||||
"MilliVcores", true);
|
||||
@ -216,6 +246,7 @@ public synchronized void finished() {
|
||||
public void recordMemoryUsage(int memoryMBs) {
|
||||
if (memoryMBs >= 0) {
|
||||
this.pMemMBsStat.add(memoryMBs);
|
||||
this.pMemMBQuantiles.add(memoryMBs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -223,6 +254,7 @@ public void recordCpuUsage(
|
||||
int totalPhysicalCpuPercent, int milliVcoresUsed) {
|
||||
if (totalPhysicalCpuPercent >=0) {
|
||||
this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent);
|
||||
this.cpuCoreUsagePercentQuantiles.add(totalPhysicalCpuPercent);
|
||||
}
|
||||
if (milliVcoresUsed >= 0) {
|
||||
this.milliVcoresUsed.add(milliVcoresUsed);
|
||||
@ -274,4 +306,41 @@ public void run() {
|
||||
};
|
||||
unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs);
|
||||
}
|
||||
|
||||
public static class ContainerMetricsQuantiles implements QuantileEstimator {
|
||||
|
||||
private final Histogram histogram = new Histogram(new UniformReservoir());
|
||||
|
||||
private Quantile[] quantiles;
|
||||
|
||||
ContainerMetricsQuantiles(Quantile[] q) {
|
||||
quantiles = q;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void insert(long value) {
|
||||
histogram.update(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
synchronized public long getCount() {
|
||||
return histogram.getCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
synchronized public void clear() {
|
||||
// don't do anything because we want metrics over the lifetime of the
|
||||
// container
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized Map<Quantile, Long> snapshot() {
|
||||
Snapshot snapshot = histogram.getSnapshot();
|
||||
Map<Quantile, Long> values = new TreeMap<>();
|
||||
for (Quantile quantile : quantiles) {
|
||||
values.put(quantile, (long) snapshot.getValue(quantile.quantile));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
||||
|
||||
import org.apache.hadoop.metrics2.AbstractMetric;
|
||||
import org.apache.hadoop.metrics2.MetricsRecord;
|
||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||
import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
|
||||
@ -26,10 +27,15 @@
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.mockito.Matchers.any;
|
||||
import static org.mockito.Matchers.anyString;
|
||||
@ -158,4 +164,54 @@ public void testContainerMetricsFinished() throws InterruptedException {
|
||||
system, containerId3, 1, 0));
|
||||
system.shutdown();
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a test to submit values for actual memory usage and see if the
|
||||
* histogram comes out correctly.
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testContainerMetricsHistogram() throws Exception {
|
||||
|
||||
// submit 2 values - 1024 and 2048. 75th, 90th, 95th and 99th percentiles
|
||||
// will be 2048. 50th percentile will be 1536((1024+2048)/2)
|
||||
// if we keep recording 1024 and 2048 in a loop, the 50th percentile
|
||||
// will tend closer to 2048
|
||||
Map<String, Long> expectedValues = new HashMap<>();
|
||||
expectedValues.put("PMemUsageMBHistogram50thPercentileMBs", 1536L);
|
||||
expectedValues.put("PMemUsageMBHistogram75thPercentileMBs", 2048L);
|
||||
expectedValues.put("PMemUsageMBHistogram90thPercentileMBs", 2048L);
|
||||
expectedValues.put("PMemUsageMBHistogram95thPercentileMBs", 2048L);
|
||||
expectedValues.put("PMemUsageMBHistogram99thPercentileMBs", 2048L);
|
||||
expectedValues.put("PCpuUsagePercentHistogram50thPercentilePercents", 0L);
|
||||
expectedValues.put("PCpuUsagePercentHistogram75thPercentilePercents", 0L);
|
||||
expectedValues.put("PCpuUsagePercentHistogram90thPercentilePercents", 0L);
|
||||
expectedValues.put("PCpuUsagePercentHistogram95thPercentilePercents", 0L);
|
||||
expectedValues.put("PCpuUsagePercentHistogram99thPercentilePercents", 0L);
|
||||
Set<String> testResults = new HashSet<>();
|
||||
int delay = 10;
|
||||
int rolloverDelay = 1000;
|
||||
MetricsCollectorImpl collector = new MetricsCollectorImpl();
|
||||
ContainerId containerId = mock(ContainerId.class);
|
||||
ContainerMetrics metrics =
|
||||
ContainerMetrics.forContainer(containerId, delay, 0);
|
||||
|
||||
metrics.recordMemoryUsage(1024);
|
||||
metrics.recordMemoryUsage(2048);
|
||||
Thread.sleep(rolloverDelay + 10);
|
||||
metrics.getMetrics(collector, true);
|
||||
for (MetricsRecord record : collector.getRecords()) {
|
||||
for (AbstractMetric metric : record.metrics()) {
|
||||
String metricName = metric.name();
|
||||
if (expectedValues.containsKey(metricName)) {
|
||||
Long expectedValue = expectedValues.get(metricName);
|
||||
Assert.assertEquals(
|
||||
"Metric " + metricName + " doesn't have expected value",
|
||||
expectedValue, metric.value());
|
||||
testResults.add(metricName);
|
||||
}
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(expectedValues.keySet(), testResults);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user