diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java index caba170507..c6369cdbdd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.concurrent.ConcurrentHashMap; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.log.metrics.EventCounter; import org.apache.hadoop.metrics2.MetricsCollector; @@ -59,6 +60,15 @@ synchronized JvmMetrics init(String processName, String sessionId) { } } + @VisibleForTesting + public synchronized void registerIfNeeded(){ + // during tests impl might exist, but is not registered + MetricsSystem ms = DefaultMetricsSystem.instance(); + if (ms.getSource("JvmMetrics") == null) { + ms.register(JvmMetrics.name(), JvmMetrics.description(), this); + } + } + static final float M = 1024*1024; static public final float MEMORY_MAX_UNLIMITED_MB = -1; diff --git a/hadoop-tools/hadoop-sls/pom.xml b/hadoop-tools/hadoop-sls/pom.xml index d70021c772..408947371e 100644 --- a/hadoop-tools/hadoop-sls/pom.xml +++ b/hadoop-tools/hadoop-sls/pom.xml @@ -135,6 +135,8 @@ src/test/resources/syn.json src/test/resources/inputsls.json src/test/resources/nodes.json + src/test/resources/exit-invariants.txt + src/test/resources/ongoing-invariants.txt diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java index e679c9de20..a77d40162c 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java @@ -49,6 +49,7 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.tools.rumen.JobTraceReader; import org.apache.hadoop.tools.rumen.LoggedJob; import org.apache.hadoop.tools.rumen.LoggedTask; @@ -243,6 +244,13 @@ protected ApplicationMasterLauncher createAMLauncher() { return new MockAMLauncher(se, this.rmContext, amMap); } }; + + // Across runs of parametrized tests, the JvmMetrics objects is retained, + // but is not registered correctly + JvmMetrics jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null); + jvmMetrics.registerIfNeeded(); + + // Init and start the actual ResourceManager rm.init(rmConf); rm.start(); } diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/BaseSLSRunnerTest.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/BaseSLSRunnerTest.java index 8ef72abcdc..6b369f2a6f 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/BaseSLSRunnerTest.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/BaseSLSRunnerTest.java @@ -21,8 +21,10 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.invariants.MetricsInvariantChecker; import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; @@ -39,7 +41,7 @@ @RunWith(value = Parameterized.class) @NotThreadSafe @SuppressWarnings("VisibilityModifier") -public class BaseSLSRunnerTest { +public abstract class BaseSLSRunnerTest { @Parameter(value = 0) public String schedulerType; @@ -54,6 +56,11 @@ public class BaseSLSRunnerTest { public String nodeFile; protected SLSRunner sls; + protected String ongoingInvariantFile; + protected String exitInvariantFile; + + @Before + public abstract void setup(); @After public void tearDown() throws InterruptedException { @@ -82,22 +89,30 @@ public void uncaughtException(Thread t, Throwable e) { switch (traceType) { case "OLD_SLS": args = new String[] {"-inputsls", traceLocation, "-output", - slsOutputDir.getAbsolutePath()}; + slsOutputDir.getAbsolutePath() }; break; case "OLD_RUMEN": args = new String[] {"-inputrumen", traceLocation, "-output", - slsOutputDir.getAbsolutePath()}; + slsOutputDir.getAbsolutePath() }; break; default: args = new String[] {"-tracetype", traceType, "-tracelocation", - traceLocation, "-output", slsOutputDir.getAbsolutePath()}; + traceLocation, "-output", slsOutputDir.getAbsolutePath() }; } if (nodeFile != null) { - args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile}); + args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile }); } + // enable continuous invariant checks conf.set(YarnConfiguration.RM_SCHEDULER, schedulerType); + if (ongoingInvariantFile != null) { + conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, + MetricsInvariantChecker.class.getCanonicalName()); + conf.set(MetricsInvariantChecker.INVARIANTS_FILE, ongoingInvariantFile); + conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true); + } + sls = new SLSRunner(conf); sls.run(args); @@ -115,6 +130,22 @@ public void uncaughtException(Thread t, Throwable e) { } timeout--; } + shutdownHookInvariantCheck(); + } + + /** + * Checks exit invariants (e.g., number of apps submitted, completed, etc.). + */ + private void shutdownHookInvariantCheck() { + + if(exitInvariantFile!=null) { + MetricsInvariantChecker ic = new MetricsInvariantChecker(); + Configuration conf = new Configuration(); + conf.set(MetricsInvariantChecker.INVARIANTS_FILE, exitInvariantFile); + conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true); + ic.init(conf, null, null); + ic.editSchedule(); + } } } diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestReservationSystemInvariants.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestReservationSystemInvariants.java index b3a79cbc43..22e1e2e729 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestReservationSystemInvariants.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestReservationSystemInvariants.java @@ -70,4 +70,8 @@ public void testSimulatorRunning() throws Exception { } + @Override + public void setup() { + + } } diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java index b2bc8d51bb..567f0d9d3b 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java @@ -20,6 +20,9 @@ import net.jcip.annotations.NotThreadSafe; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -38,12 +41,8 @@ public class TestSLSRunner extends BaseSLSRunnerTest { @Parameters(name = "Testing with: {1}, {0}, (nodeFile {3})") public static Collection data() { - String capScheduler = - "org.apache.hadoop.yarn.server.resourcemanager.scheduler." - + "capacity.CapacityScheduler"; - String fairScheduler = - "org.apache.hadoop.yarn.server.resourcemanager.scheduler." - + "fair.FairScheduler"; + String capScheduler = CapacityScheduler.class.getCanonicalName(); + String fairScheduler = FairScheduler.class.getCanonicalName(); String slsTraceFile = "src/test/resources/inputsls.json"; String rumenTraceFile = "src/main/data/2jobs2min-rumen-jh.json"; String synthTraceFile = "src/test/resources/syn.json"; @@ -73,6 +72,12 @@ public static Collection data() { }); } + @Before + public void setup() { + ongoingInvariantFile = "src/test/resources/ongoing-invariants.txt"; + exitInvariantFile = "src/test/resources/exit-invariants.txt"; + } + @Test(timeout = 60000) @SuppressWarnings("all") public void testSimulatorRunning() throws Exception { diff --git a/hadoop-tools/hadoop-sls/src/test/resources/exit-invariants.txt b/hadoop-tools/hadoop-sls/src/test/resources/exit-invariants.txt new file mode 100644 index 0000000000..b4a3228be5 --- /dev/null +++ b/hadoop-tools/hadoop-sls/src/test/resources/exit-invariants.txt @@ -0,0 +1,8 @@ +ActiveApplications >= 0 +AppsCompleted >= 0 +AppsFailed >= 0 +AppsKilled >= 0 +AppsPending >= 0 +AppsRunning >= 0 +AppsSubmitted >= 0 +PendingContainers >= 0 diff --git a/hadoop-tools/hadoop-sls/src/test/resources/log4j.properties b/hadoop-tools/hadoop-sls/src/test/resources/log4j.properties new file mode 100644 index 0000000000..81a3f6ad5d --- /dev/null +++ b/hadoop-tools/hadoop-sls/src/test/resources/log4j.properties @@ -0,0 +1,19 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# log4j configuration used during build and unit tests + +log4j.rootLogger=info,stdout +log4j.threshold=ALL +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n diff --git a/hadoop-tools/hadoop-sls/src/test/resources/ongoing-invariants.txt b/hadoop-tools/hadoop-sls/src/test/resources/ongoing-invariants.txt new file mode 100644 index 0000000000..363ed0d996 --- /dev/null +++ b/hadoop-tools/hadoop-sls/src/test/resources/ongoing-invariants.txt @@ -0,0 +1,54 @@ +running_0 >= 0 +running_60 >= 0 +running_300 >= 0 +running_1440 >= 0 +AppsSubmitted >= 0 +AppsRunning >= 0 +AppsPending >= 0 +AppsCompleted >= 0 +AppsKilled >= 0 +AppsFailed >= 0 +AllocatedMB >= 0 +AllocatedVCores >= 0 +AllocatedContainers >= 0 +AggregateContainersAllocated >= 0 +AggregateNodeLocalContainersAllocated >= 0 +AggregateRackLocalContainersAllocated >= 0 +AggregateOffSwitchContainersAllocated >= 0 +AggregateContainersReleased >= 0 +AggregateContainersPreempted >= 0 +AvailableMB >= 0 +AvailableVCores >= 0 +PendingMB >= 0 +PendingVCores >= 0 +PendingContainers >= 0 +ReservedMB >= 0 +ReservedVCores >= 0 +ReservedContainers >= 0 +ActiveUsers >= 0 +ActiveApplications >= 0 +AppAttemptFirstContainerAllocationDelayNumOps >= 0 +AppAttemptFirstContainerAllocationDelayAvgTime >= 0 +MemNonHeapUsedM >= 0 +MemNonHeapCommittedM >= 0 +MemNonHeapMaxM >= 0 || MemNonHeapMaxM == -1 +MemHeapUsedM >= 0 +MemHeapCommittedM >= 0 +MemHeapMaxM >= 0 +MemMaxM >= 0 +GcCountPS_Scavenge >= 0 +GcTimeMillisPS_Scavenge >= 0 +GcCountPS_MarkSweep >= 0 +GcTimeMillisPS_MarkSweep >= 0 +GcCount >= 0 +GcTimeMillis >= 0 +ThreadsNew >= 0 +ThreadsRunnable >= 0 +ThreadsBlocked >= 0 +ThreadsWaiting >= 0 +ThreadsTimedWaiting >= 0 +ThreadsTerminated >= 0 +LogFatal >= 0 +LogError >= 0 +LogWarn >= 0 +LogInfo >= 0