YARN-10713. ClusterMetrics should support custom resource capacity related metrics. Contributed by Qi Zhu.
This commit is contained in:
parent
af1f9f43ea
commit
19e418c10d
@ -20,6 +20,7 @@
|
||||
|
||||
import static org.apache.hadoop.metrics2.lib.Interns.info;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
@ -35,6 +36,9 @@
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
||||
import org.apache.hadoop.yarn.metrics.CustomResourceMetricValue;
|
||||
import org.apache.hadoop.yarn.metrics.CustomResourceMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
|
||||
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
|
||||
|
||||
@InterfaceAudience.Private
|
||||
@ -58,10 +62,19 @@ public class ClusterMetrics {
|
||||
@Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores;
|
||||
@Metric("Memory Capability") MutableGaugeLong capabilityMB;
|
||||
@Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores;
|
||||
@Metric("GPU Capability") MutableGaugeLong capabilityGPUs;
|
||||
|
||||
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
|
||||
"Metrics for the Yarn Cluster");
|
||||
|
||||
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX =
|
||||
"Capability.";
|
||||
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC =
|
||||
"NAME Capability";
|
||||
|
||||
private static CustomResourceMetrics customResourceMetrics;
|
||||
|
||||
private final CustomResourceMetricValue customResourceCapability =
|
||||
new CustomResourceMetricValue();
|
||||
|
||||
private static volatile ClusterMetrics INSTANCE = null;
|
||||
private static MetricsRegistry registry;
|
||||
@ -86,6 +99,17 @@ private static void registerMetrics() {
|
||||
if (ms != null) {
|
||||
ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE);
|
||||
}
|
||||
|
||||
if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
|
||||
customResourceMetrics =
|
||||
new CustomResourceMetrics();
|
||||
Map<String, Long> customResources =
|
||||
customResourceMetrics.initAndGetCustomResources();
|
||||
customResourceMetrics.
|
||||
registerCustomResources(customResources,
|
||||
registry, CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX,
|
||||
CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
@ -209,23 +233,20 @@ public long getCapabilityVirtualCores() {
|
||||
return capabilityVirtualCores.value();
|
||||
}
|
||||
|
||||
public long getCapabilityGPUs() {
|
||||
if (capabilityGPUs == null) {
|
||||
return 0;
|
||||
}
|
||||
public Map<String, Long> getCustomResourceCapability() {
|
||||
return customResourceCapability.getValues();
|
||||
}
|
||||
|
||||
return capabilityGPUs.value();
|
||||
public void setCustomResourceCapability(Resource res) {
|
||||
this.customResourceCapability.set(res);
|
||||
}
|
||||
|
||||
public void incrCapability(Resource res) {
|
||||
if (res != null) {
|
||||
capabilityMB.incr(res.getMemorySize());
|
||||
capabilityVirtualCores.incr(res.getVirtualCores());
|
||||
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
|
||||
.get(ResourceInformation.GPU_URI);
|
||||
if (gpuIndex != null) {
|
||||
capabilityGPUs.incr(res.
|
||||
getResourceValue(ResourceInformation.GPU_URI));
|
||||
if (customResourceCapability != null) {
|
||||
customResourceCapability.increase(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -234,11 +255,8 @@ public void decrCapability(Resource res) {
|
||||
if (res != null) {
|
||||
capabilityMB.decr(res.getMemorySize());
|
||||
capabilityVirtualCores.decr(res.getVirtualCores());
|
||||
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
|
||||
.get(ResourceInformation.GPU_URI);
|
||||
if (gpuIndex != null) {
|
||||
capabilityGPUs.decr(res.
|
||||
getResourceValue(ResourceInformation.GPU_URI));
|
||||
if (customResourceCapability != null) {
|
||||
customResourceCapability.decrease(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -231,7 +231,8 @@ public void testClusterMetricsWithGPU()
|
||||
assertEquals("Cluster Capability Vcores incorrect",
|
||||
metrics.getCapabilityVirtualCores(), 4 * 8);
|
||||
assertEquals("Cluster Capability GPUs incorrect",
|
||||
metrics.getCapabilityGPUs(), 4 * 8);
|
||||
(metrics.getCustomResourceCapability()
|
||||
.get(GPU_URI)).longValue(), 4 * 8);
|
||||
|
||||
for (RMNode rmNode : rmNodes) {
|
||||
nodeTracker.removeNode(rmNode.getNodeID());
|
||||
@ -243,7 +244,8 @@ public void testClusterMetricsWithGPU()
|
||||
assertEquals("Cluster Capability Vcores incorrect",
|
||||
metrics.getCapabilityVirtualCores(), 0);
|
||||
assertEquals("Cluster Capability GPUs incorrect",
|
||||
metrics.getCapabilityGPUs(), 0);
|
||||
(metrics.getCustomResourceCapability()
|
||||
.get(GPU_URI)).longValue(), 0);
|
||||
ClusterMetrics.destroy();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user