YARN-6547. Enhance SLS-based tests leveraging invariant checker.

This commit is contained in:
Carlo Curino 2017-06-06 17:19:15 -07:00
parent c31cb879a3
commit b65100c14b
9 changed files with 152 additions and 11 deletions

View File

@ -27,6 +27,7 @@
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.log.metrics.EventCounter;
import org.apache.hadoop.metrics2.MetricsCollector;
@ -59,6 +60,15 @@ synchronized JvmMetrics init(String processName, String sessionId) {
}
}
@VisibleForTesting
public synchronized void registerIfNeeded(){
// during tests impl might exist, but is not registered
MetricsSystem ms = DefaultMetricsSystem.instance();
if (ms.getSource("JvmMetrics") == null) {
ms.register(JvmMetrics.name(), JvmMetrics.description(), this);
}
}
static final float M = 1024*1024;
static public final float MEMORY_MAX_UNLIMITED_MB = -1;

View File

@ -135,6 +135,8 @@
<exclude>src/test/resources/syn.json</exclude>
<exclude>src/test/resources/inputsls.json</exclude>
<exclude>src/test/resources/nodes.json</exclude>
<exclude>src/test/resources/exit-invariants.txt</exclude>
<exclude>src/test/resources/ongoing-invariants.txt</exclude>
</excludes>
</configuration>
</plugin>

View File

@ -49,6 +49,7 @@
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.tools.rumen.JobTraceReader;
import org.apache.hadoop.tools.rumen.LoggedJob;
import org.apache.hadoop.tools.rumen.LoggedTask;
@ -243,6 +244,13 @@ protected ApplicationMasterLauncher createAMLauncher() {
return new MockAMLauncher(se, this.rmContext, amMap);
}
};
// Across runs of parametrized tests, the JvmMetrics objects is retained,
// but is not registered correctly
JvmMetrics jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
jvmMetrics.registerIfNeeded();
// Init and start the actual ResourceManager
rm.init(rmConf);
rm.start();
}

View File

@ -21,8 +21,10 @@
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.monitor.invariants.MetricsInvariantChecker;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
@ -39,7 +41,7 @@
@RunWith(value = Parameterized.class)
@NotThreadSafe
@SuppressWarnings("VisibilityModifier")
public class BaseSLSRunnerTest {
public abstract class BaseSLSRunnerTest {
@Parameter(value = 0)
public String schedulerType;
@ -54,6 +56,11 @@ public class BaseSLSRunnerTest {
public String nodeFile;
protected SLSRunner sls;
protected String ongoingInvariantFile;
protected String exitInvariantFile;
@Before
public abstract void setup();
@After
public void tearDown() throws InterruptedException {
@ -82,22 +89,30 @@ public void uncaughtException(Thread t, Throwable e) {
switch (traceType) {
case "OLD_SLS":
args = new String[] {"-inputsls", traceLocation, "-output",
slsOutputDir.getAbsolutePath()};
slsOutputDir.getAbsolutePath() };
break;
case "OLD_RUMEN":
args = new String[] {"-inputrumen", traceLocation, "-output",
slsOutputDir.getAbsolutePath()};
slsOutputDir.getAbsolutePath() };
break;
default:
args = new String[] {"-tracetype", traceType, "-tracelocation",
traceLocation, "-output", slsOutputDir.getAbsolutePath()};
traceLocation, "-output", slsOutputDir.getAbsolutePath() };
}
if (nodeFile != null) {
args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile});
args = ArrayUtils.addAll(args, new String[] {"-nodes", nodeFile });
}
// enable continuous invariant checks
conf.set(YarnConfiguration.RM_SCHEDULER, schedulerType);
if (ongoingInvariantFile != null) {
conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
MetricsInvariantChecker.class.getCanonicalName());
conf.set(MetricsInvariantChecker.INVARIANTS_FILE, ongoingInvariantFile);
conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true);
}
sls = new SLSRunner(conf);
sls.run(args);
@ -115,6 +130,22 @@ public void uncaughtException(Thread t, Throwable e) {
}
timeout--;
}
shutdownHookInvariantCheck();
}
/**
* Checks exit invariants (e.g., number of apps submitted, completed, etc.).
*/
private void shutdownHookInvariantCheck() {
if(exitInvariantFile!=null) {
MetricsInvariantChecker ic = new MetricsInvariantChecker();
Configuration conf = new Configuration();
conf.set(MetricsInvariantChecker.INVARIANTS_FILE, exitInvariantFile);
conf.setBoolean(MetricsInvariantChecker.THROW_ON_VIOLATION, true);
ic.init(conf, null, null);
ic.editSchedule();
}
}
}

View File

@ -70,4 +70,8 @@ public void testSimulatorRunning() throws Exception {
}
@Override
public void setup() {
}
}

View File

@ -20,6 +20,9 @@
import net.jcip.annotations.NotThreadSafe;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@ -38,12 +41,8 @@ public class TestSLSRunner extends BaseSLSRunnerTest {
@Parameters(name = "Testing with: {1}, {0}, (nodeFile {3})")
public static Collection<Object[]> data() {
String capScheduler =
"org.apache.hadoop.yarn.server.resourcemanager.scheduler."
+ "capacity.CapacityScheduler";
String fairScheduler =
"org.apache.hadoop.yarn.server.resourcemanager.scheduler."
+ "fair.FairScheduler";
String capScheduler = CapacityScheduler.class.getCanonicalName();
String fairScheduler = FairScheduler.class.getCanonicalName();
String slsTraceFile = "src/test/resources/inputsls.json";
String rumenTraceFile = "src/main/data/2jobs2min-rumen-jh.json";
String synthTraceFile = "src/test/resources/syn.json";
@ -73,6 +72,12 @@ public static Collection<Object[]> data() {
});
}
@Before
public void setup() {
ongoingInvariantFile = "src/test/resources/ongoing-invariants.txt";
exitInvariantFile = "src/test/resources/exit-invariants.txt";
}
@Test(timeout = 60000)
@SuppressWarnings("all")
public void testSimulatorRunning() throws Exception {

View File

@ -0,0 +1,8 @@
ActiveApplications >= 0
AppsCompleted >= 0
AppsFailed >= 0
AppsKilled >= 0
AppsPending >= 0
AppsRunning >= 0
AppsSubmitted >= 0
PendingContainers >= 0

View File

@ -0,0 +1,19 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# log4j configuration used during build and unit tests
log4j.rootLogger=info,stdout
log4j.threshold=ALL
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n

View File

@ -0,0 +1,54 @@
running_0 >= 0
running_60 >= 0
running_300 >= 0
running_1440 >= 0
AppsSubmitted >= 0
AppsRunning >= 0
AppsPending >= 0
AppsCompleted >= 0
AppsKilled >= 0
AppsFailed >= 0
AllocatedMB >= 0
AllocatedVCores >= 0
AllocatedContainers >= 0
AggregateContainersAllocated >= 0
AggregateNodeLocalContainersAllocated >= 0
AggregateRackLocalContainersAllocated >= 0
AggregateOffSwitchContainersAllocated >= 0
AggregateContainersReleased >= 0
AggregateContainersPreempted >= 0
AvailableMB >= 0
AvailableVCores >= 0
PendingMB >= 0
PendingVCores >= 0
PendingContainers >= 0
ReservedMB >= 0
ReservedVCores >= 0
ReservedContainers >= 0
ActiveUsers >= 0
ActiveApplications >= 0
AppAttemptFirstContainerAllocationDelayNumOps >= 0
AppAttemptFirstContainerAllocationDelayAvgTime >= 0
MemNonHeapUsedM >= 0
MemNonHeapCommittedM >= 0
MemNonHeapMaxM >= 0 || MemNonHeapMaxM == -1
MemHeapUsedM >= 0
MemHeapCommittedM >= 0
MemHeapMaxM >= 0
MemMaxM >= 0
GcCountPS_Scavenge >= 0
GcTimeMillisPS_Scavenge >= 0
GcCountPS_MarkSweep >= 0
GcTimeMillisPS_MarkSweep >= 0
GcCount >= 0
GcTimeMillis >= 0
ThreadsNew >= 0
ThreadsRunnable >= 0
ThreadsBlocked >= 0
ThreadsWaiting >= 0
ThreadsTimedWaiting >= 0
ThreadsTerminated >= 0
LogFatal >= 0
LogError >= 0
LogWarn >= 0
LogInfo >= 0