YARN-11490. Reverting YARN-11211 and eliminating the use of DefaultMetricsSystem during configuration validation (#5644)

This commit is contained in:
Tamas Domok 2023-05-23 10:36:37 +02:00 committed by GitHub
parent a98d15804a
commit aeb3f6f1a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 205 additions and 26 deletions

View File

@ -78,7 +78,7 @@ public synchronized QueueMetrics getUserMetrics(String userName) {
metrics = new PartitionQueueMetrics(this.metricsSystem, this.queueName, metrics = new PartitionQueueMetrics(this.metricsSystem, this.queueName,
null, false, this.conf, this.partition); null, false, this.conf, this.partition);
users.put(userName, metrics); users.put(userName, metrics);
registerMetrics( metricsSystem.register(
pSourceName(partitionJMXStr).append(qSourceName(queueName)) pSourceName(partitionJMXStr).append(qSourceName(queueName))
.append(",user=").append(userName).toString(), .append(",user=").append(userName).toString(),
"Metrics for user '" + userName + "' in queue '" + queueName + "'", "Metrics for user '" + userName + "' in queue '" + queueName + "'",

View File

@ -117,6 +117,9 @@ public class QueueMetrics implements MetricsSource {
@Metric("Reserved CPU in virtual cores") MutableGaugeInt reservedVCores; @Metric("Reserved CPU in virtual cores") MutableGaugeInt reservedVCores;
@Metric("# of reserved containers") MutableGaugeInt reservedContainers; @Metric("# of reserved containers") MutableGaugeInt reservedContainers;
// INTERNAL ONLY
private static final String CONFIGURATION_VALIDATION = "yarn.configuration-validation";
private final MutableGaugeInt[] runningTime; private final MutableGaugeInt[] runningTime;
private TimeBucketMetrics<ApplicationId> runBuckets; private TimeBucketMetrics<ApplicationId> runBuckets;
@ -289,7 +292,7 @@ public synchronized QueueMetrics getUserMetrics(String userName) {
metrics = metrics =
new QueueMetrics(metricsSystem, queueName, null, false, conf); new QueueMetrics(metricsSystem, queueName, null, false, conf);
users.put(userName, metrics); users.put(userName, metrics);
registerMetrics( metricsSystem.register(
sourceName(queueName).append(",user=").append(userName).toString(), sourceName(queueName).append(",user=").append(userName).toString(),
"Metrics for user '"+ userName +"' in queue '"+ queueName +"'", "Metrics for user '"+ userName +"' in queue '"+ queueName +"'",
metrics.tag(QUEUE_INFO, queueName).tag(USER_INFO, userName)); metrics.tag(QUEUE_INFO, queueName).tag(USER_INFO, userName));
@ -334,13 +337,15 @@ public synchronized QueueMetrics getPartitionQueueMetrics(String partition) {
QueueMetrics queueMetrics = QueueMetrics queueMetrics =
new PartitionQueueMetrics(metricsSystem, this.queueName, parentQueue, new PartitionQueueMetrics(metricsSystem, this.queueName, parentQueue,
this.enableUserMetrics, this.conf, partition); this.enableUserMetrics, this.conf, partition);
registerMetrics( metricsSystem.register(
pSourceName(partitionJMXStr).append(qSourceName(this.queueName)) pSourceName(partitionJMXStr).append(qSourceName(this.queueName))
.toString(), .toString(),
"Metrics for queue: " + this.queueName, "Metrics for queue: " + this.queueName,
queueMetrics.tag(PARTITION_INFO, partitionJMXStr).tag(QUEUE_INFO, queueMetrics.tag(PARTITION_INFO, partitionJMXStr).tag(QUEUE_INFO,
this.queueName)); this.queueName));
if (!isConfigurationValidationSet(conf)) {
getQueueMetrics().put(metricName, queueMetrics); getQueueMetrics().put(metricName, queueMetrics);
}
registerPartitionMetricsCreation(metricName); registerPartitionMetricsCreation(metricName);
return queueMetrics; return queueMetrics;
} else { } else {
@ -348,6 +353,26 @@ public synchronized QueueMetrics getPartitionQueueMetrics(String partition) {
} }
} }
/**
* Check whether we are in a configuration validation mode. INTERNAL ONLY.
*
* @param conf the configuration to check
* @return true if
*/
public static boolean isConfigurationValidationSet(Configuration conf) {
return conf.getBoolean(CONFIGURATION_VALIDATION, false);
}
/**
* Set configuration validation mode. INTERNAL ONLY.
*
* @param conf the configuration to update
* @param value the value for the validation mode
*/
public static void setConfigurationValidation(Configuration conf, boolean value) {
conf.setBoolean(CONFIGURATION_VALIDATION, value);
}
/** /**
* Partition Metrics * Partition Metrics
* *
@ -378,7 +403,7 @@ private QueueMetrics getPartitionMetrics(String partition) {
// Register with the MetricsSystems // Register with the MetricsSystems
if (metricsSystem != null) { if (metricsSystem != null) {
registerMetrics(pSourceName(partitionJMXStr).toString(), metricsSystem.register(pSourceName(partitionJMXStr).toString(),
"Metrics for partition: " + partitionJMXStr, "Metrics for partition: " + partitionJMXStr,
(PartitionQueueMetrics) metrics.tag(PARTITION_INFO, (PartitionQueueMetrics) metrics.tag(PARTITION_INFO,
partitionJMXStr)); partitionJMXStr));
@ -1359,15 +1384,4 @@ public void setParentQueue(Queue parentQueue) {
} }
} }
} }
protected void registerMetrics(String sourceName, String desc, QueueMetrics metrics) {
MetricsSource source = metricsSystem.getSource(sourceName);
// Unregister metrics if a source is already present
if (source != null) {
LOG.info("Unregistering source " + sourceName);
metricsSystem.unregisterSource(sourceName);
}
metricsSystem.register(sourceName, desc, metrics);
}
} }

View File

@ -21,11 +21,13 @@
import java.util.Map; import java.util.Map;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.MetricsSink;
import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsAnnotations;
import org.apache.hadoop.metrics2.lib.MutableGaugeFloat; import org.apache.hadoop.metrics2.lib.MutableGaugeFloat;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
@ -230,9 +232,74 @@ protected void createQueueMetricsForCustomResources() {
} }
} }
@Metrics(context="dummymetricssystem")
public static class DummyMetricsSystemImpl extends MetricsSystem {
@Override
public MetricsSystem init(String prefix) {
return this;
}
@Override
public <T> T register(String name, String desc, T source) {
MetricsAnnotations.newSourceBuilder(source).build();
return source;
}
@Override
public void unregisterSource(String name) {
}
@Override
public MetricsSource getSource(String name) {
return null;
}
@Override
public <T extends MetricsSink> T register(String name, String desc, T sink) {
return null;
}
@Override
public void register(Callback callback) {
}
@Override
public void publishMetricsNow() {
}
@Override
public boolean shutdown() {
return false;
}
@Override
public void start() {
}
@Override
public void stop() {
}
@Override
public void startMetricsMBeans() {
}
@Override
public void stopMetricsMBeans() {
}
@Override
public String currentConfig() {
return null;
}
}
public synchronized static CSQueueMetrics forQueue(String queueName, public synchronized static CSQueueMetrics forQueue(String queueName,
Queue parent, boolean enableUserMetrics, Configuration conf) { Queue parent, boolean enableUserMetrics, Configuration conf) {
MetricsSystem ms = DefaultMetricsSystem.instance(); final boolean isConfigValidation = isConfigurationValidationSet(conf);
MetricsSystem ms = isConfigValidation
? new DummyMetricsSystemImpl() : DefaultMetricsSystem.instance();
QueueMetrics metrics = getQueueMetrics().get(queueName); QueueMetrics metrics = getQueueMetrics().get(queueName);
if (metrics == null) { if (metrics == null) {
metrics = metrics =
@ -241,16 +308,15 @@ public synchronized static CSQueueMetrics forQueue(String queueName,
// Register with the MetricsSystems // Register with the MetricsSystems
if (ms != null) { if (ms != null) {
MetricsSource source = ms.getSource(sourceName(queueName).toString());
if (source != null) {
ms.unregisterSource(sourceName(queueName).toString());
}
metrics = metrics =
ms.register(sourceName(queueName).toString(), "Metrics for queue: " ms.register(sourceName(queueName).toString(), "Metrics for queue: "
+ queueName, metrics); + queueName, metrics);
} }
if (!isConfigValidation) {
getQueueMetrics().put(queueName, metrics); getQueueMetrics().put(queueName, metrics);
} }
}
return (CSQueueMetrics) metrics; return (CSQueueMetrics) metrics;
} }
@ -265,7 +331,7 @@ public synchronized QueueMetrics getUserMetrics(String userName) {
metrics = metrics =
new CSQueueMetrics(metricsSystem, queueName, null, false, conf); new CSQueueMetrics(metricsSystem, queueName, null, false, conf);
users.put(userName, metrics); users.put(userName, metrics);
registerMetrics( metricsSystem.register(
sourceName(queueName).append(",user=").append(userName).toString(), sourceName(queueName).append(",user=").append(userName).toString(),
"Metrics for user '" + userName + "' in queue '" + queueName + "'", "Metrics for user '" + userName + "' in queue '" + queueName + "'",
((CSQueueMetrics) metrics.tag(QUEUE_INFO, queueName)).tag(USER_INFO, ((CSQueueMetrics) metrics.tag(QUEUE_INFO, queueName)).tag(USER_INFO,

View File

@ -41,8 +41,13 @@ private CapacitySchedulerConfigValidator() {
} }
public static boolean validateCSConfiguration( public static boolean validateCSConfiguration(
final Configuration oldConf, final Configuration newConf, final Configuration oldConfParam, final Configuration newConf,
final RMContext rmContext) throws IOException { final RMContext rmContext) throws IOException {
// ensure that the oldConf is deep copied
Configuration oldConf = new Configuration(oldConfParam);
QueueMetrics.setConfigurationValidation(oldConf, true);
QueueMetrics.setConfigurationValidation(newConf, true);
CapacityScheduler liveScheduler = (CapacityScheduler) rmContext.getScheduler(); CapacityScheduler liveScheduler = (CapacityScheduler) rmContext.getScheduler();
CapacityScheduler newCs = new CapacityScheduler(); CapacityScheduler newCs = new CapacityScheduler();
try { try {
@ -56,8 +61,6 @@ public static boolean validateCSConfiguration(
return true; return true;
} finally { } finally {
newCs.stop(); newCs.stop();
QueueMetrics.clearQueueMetrics();
liveScheduler.resetSchedulerMetrics();
} }
} }

View File

@ -19,6 +19,9 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap;
import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.LocalConfigurationProvider;
import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes; import org.apache.hadoop.yarn.api.protocolrecords.ResourceTypes;
@ -32,6 +35,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager; import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.util.YarnVersionInfo; import org.apache.hadoop.yarn.util.YarnVersionInfo;
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
@ -425,6 +429,98 @@ public void testValidateCSConfigAddALeafQueueValid() throws IOException {
Assert.assertTrue(isValidConfig); Assert.assertTrue(isValidConfig);
} }
@Test
public void testValidateDoesNotModifyTheDefaultMetricsSystem() throws Exception {
try {
YarnConfiguration conf = new YarnConfiguration(CapacitySchedulerConfigGeneratorForTest
.createBasicCSConfiguration());
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
ResourceScheduler.class);
mockRM = new MockRM(conf);
cs = (CapacityScheduler) mockRM.getResourceScheduler();
mockRM.start();
cs.start();
RMContext rmContext = mockRM.getRMContext();
Configuration oldConfig = cs.getConfig();
final Map<String, QueueMetrics> cache = QueueMetrics.getQueueMetrics();
final MetricsSystem ms = DefaultMetricsSystem.instance();
QueueMetrics origQM1 = cache.get("root.test1");
QueueMetrics origQM2 = cache.get("root.test2");
Assert.assertNotNull("Original queues should be found in the cache", origQM1);
Assert.assertNotNull("Original queues should be found in the cache", origQM2);
QueueMetrics origPQM1 = cache.get("default.root.test1");
QueueMetrics origPQM2 = cache.get("default.root.test2");
Assert.assertNotNull("Original queues should be found in the cache (PartitionQueueMetrics)",
origPQM1);
Assert.assertNotNull("Original queues should be found in the cache (PartitionQueueMetrics)",
origPQM2);
MetricsSource origMS1 =
ms.getSource("QueueMetrics,q0=root,q1=test1");
MetricsSource origMS2 =
ms.getSource("QueueMetrics,q0=root,q1=test2");
Assert.assertNotNull("Original queues should be found in the Metrics System",
origMS1);
Assert.assertNotNull("Original queues should be found in the Metrics System",
origMS2);
MetricsSource origPMS1 = ms
.getSource("PartitionQueueMetrics,partition=,q0=root,q1=test1");
MetricsSource origPMS2 = ms
.getSource("PartitionQueueMetrics,partition=,q0=root,q1=test2");
Assert.assertNotNull(
"Original queues should be found in Metrics System (PartitionQueueMetrics)", origPMS1);
Assert.assertNotNull(
"Original queues should be found in Metrics System (PartitionQueueMetrics)", origPMS2);
Configuration newConfig = new Configuration(oldConfig);
newConfig
.set("yarn.scheduler.capacity.root.queues", "test1, test2, test3");
newConfig
.set("yarn.scheduler.capacity.root.test3.state", "RUNNING");
newConfig
.set("yarn.scheduler.capacity.root.test3.capacity", "30");
newConfig
.set("yarn.scheduler.capacity.root.test1.capacity", "20");
boolean isValidConfig = CapacitySchedulerConfigValidator
.validateCSConfiguration(oldConfig, newConfig, rmContext);
Assert.assertTrue(isValidConfig);
Assert.assertFalse("Validated new queue should not be in the cache",
cache.containsKey("root.test3"));
Assert.assertFalse("Validated new queue should not be in the cache (PartitionQueueMetrics)",
cache.containsKey("default.root.test3"));
Assert.assertNull("Validated new queue should not be in the Metrics System",
ms.getSource("QueueMetrics,q0=root,q1=test3"));
Assert.assertNull(
"Validated new queue should not be in Metrics System (PartitionQueueMetrics)",
ms
.getSource("PartitionQueueMetrics,partition=,q0=root,q1=test3"));
// Config validation should not change the existing
// objects in the cache and the metrics system
Assert.assertEquals(origQM1, cache.get("root.test1"));
Assert.assertEquals(origQM2, cache.get("root.test2"));
Assert.assertEquals(origPQM1, cache.get("default.root.test1"));
Assert.assertEquals(origPQM1, cache.get("default.root.test1"));
Assert.assertEquals(origMS1,
ms.getSource("QueueMetrics,q0=root,q1=test1"));
Assert.assertEquals(origMS2,
ms.getSource("QueueMetrics,q0=root,q1=test2"));
Assert.assertEquals(origPMS1,
ms.getSource("PartitionQueueMetrics,partition=,q0=root,q1=test1"));
Assert.assertEquals(origPMS2,
ms.getSource("PartitionQueueMetrics,partition=,q0=root,q1=test2"));
} finally {
mockRM.stop();
}
}
/** /**
* Delete a running queue. * Delete a running queue.
*/ */