YARN-7388. TestAMRestart should be scheduler agnostic.

This commit is contained in:
Haibo Chen 2017-11-09 10:49:50 -08:00
parent 6c32ddad30
commit a1382a18df
6 changed files with 59 additions and 24 deletions

View File

@ -1361,6 +1361,14 @@ public long getMaximumApplicationLifetime(String queueName) {
return -1;
}
/**
* Kill a RMContainer. This is meant to be called in tests only to simulate
* AM container failures.
* @param container the container to kill
*/
@VisibleForTesting
public abstract void killContainer(RMContainer container);
/**
* Update internal state of the scheduler. This can be useful for scheduler
* implementations that maintain some state that needs to be periodically

View File

@ -101,6 +101,19 @@ public static ContainerStatus createAbnormalContainerStatus(
ContainerExitStatus.ABORTED, diagnostics);
}
/**
* Utility to create a {@link ContainerStatus} for killed containers.
* @param containerId {@link ContainerId} of the killed container.
* @param diagnostics diagnostic message
* @return <code>ContainerStatus</code> for a killed container
*/
public static ContainerStatus createKilledContainerStatus(
ContainerId containerId, String diagnostics) {
return createAbnormalContainerStatus(containerId,
ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, diagnostics);
}
/**
* Utility to create a {@link ContainerStatus} during exceptional
* circumstances.

View File

@ -1759,6 +1759,12 @@ public void markContainerForPreemption(ApplicationAttemptId aid,
}
}
@VisibleForTesting
@Override
public void killContainer(RMContainer container) {
markContainerForKillable(container);
}
public void markContainerForKillable(
RMContainer killableContainer) {
try {

View File

@ -786,6 +786,16 @@ public Resource getNormalizedResource(Resource requestedResource) {
incrAllocation);
}
@VisibleForTesting
@Override
public void killContainer(RMContainer container) {
ContainerStatus status = SchedulerUtils.createKilledContainerStatus(
container.getContainerId(),
"Killed by RM to simulate an AM container failure");
LOG.info("Killing container " + container);
completedContainer(container, status, RMContainerEventType.KILL);
}
@Override
public Allocation allocate(ApplicationAttemptId appAttemptId,
List<ResourceRequest> ask, List<ContainerId> release,

View File

@ -975,6 +975,16 @@ protected synchronized void nodeUpdate(RMNode nm) {
updateAvailableResourcesMetrics();
}
@VisibleForTesting
@Override
public void killContainer(RMContainer container) {
ContainerStatus status = SchedulerUtils.createKilledContainerStatus(
container.getContainerId(),
"Killed by RM to simulate an AM container failure");
LOG.info("Killing container " + container);
completedContainer(container, status, RMContainerEventType.KILL);
}
@Override
public synchronized void recoverContainersOnNode(
List<NMContainerStatus> containerReports, RMNode nm) {

View File

@ -51,10 +51,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.Records;
import org.junit.Assert;
@ -376,8 +374,6 @@ public void testNMTokensRebindOnAMRestart() throws Exception {
@Test(timeout = 100000)
public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
ResourceScheduler.class);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
@ -389,12 +385,12 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
RMApp app1 = rm1.submitApp(200);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
CapacityScheduler scheduler =
(CapacityScheduler) rm1.getResourceScheduler();
AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm1.getResourceScheduler();
ContainerId amContainer =
ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
// Preempt the next attempt;
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
scheduler.killContainer(scheduler.getRMContainer(amContainer));
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler,
@ -414,7 +410,7 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
// Preempt the second attempt.
ContainerId amContainer2 =
ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer2));
scheduler.killContainer(scheduler.getRMContainer(amContainer2));
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler,
@ -503,8 +499,6 @@ public void testShouldNotCountFailureToMaxAttemptRetry() throws Exception {
@Test(timeout = 100000)
public void testMaxAttemptOneMeansOne() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
ResourceScheduler.class);
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
@ -516,12 +510,12 @@ public void testMaxAttemptOneMeansOne() throws Exception {
RMApp app1 = rm1.submitApp(200);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
CapacityScheduler scheduler =
(CapacityScheduler) rm1.getResourceScheduler();
AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm1.getResourceScheduler();
ContainerId amContainer =
ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
// Preempt the attempt;
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
scheduler.killContainer(scheduler.getRMContainer(amContainer));
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(scheduler,
@ -539,8 +533,6 @@ public void testMaxAttemptOneMeansOne() throws Exception {
@Test(timeout = 60000)
public void testPreemptedAMRestartOnRMRestart() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
@ -556,8 +548,8 @@ public void testPreemptedAMRestartOnRMRestart() throws Exception {
RMApp app1 = rm1.submitApp(200);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
CapacityScheduler scheduler =
(CapacityScheduler) rm1.getResourceScheduler();
AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm1.getResourceScheduler();
ContainerId amContainer =
ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
@ -577,7 +569,7 @@ public void testPreemptedAMRestartOnRMRestart() throws Exception {
// Forcibly preempt the am container;
amContainer = ContainerId.newContainerId(am2.getApplicationAttemptId(), 1);
scheduler.markContainerForKillable(scheduler.getRMContainer(amContainer));
scheduler.killContainer(scheduler.getRMContainer(amContainer));
rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
Assert.assertFalse(attempt2.shouldCountTowardsMaxAttemptRetry());
@ -619,8 +611,6 @@ public void testPreemptedAMRestartOnRMRestart() throws Exception {
public void testRMRestartOrFailoverNotCountedForAMFailures()
throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
@ -631,8 +621,8 @@ public void testRMRestartOrFailoverNotCountedForAMFailures()
MockRM rm1 = new MockRM(conf);
MemoryRMStateStore memStore = (MemoryRMStateStore) rm1.getRMStateStore();
rm1.start();
CapacityScheduler scheduler =
(CapacityScheduler) rm1.getResourceScheduler();
AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm1.getResourceScheduler();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
nm1.registerNode();
@ -694,8 +684,6 @@ public void testRMRestartOrFailoverNotCountedForAMFailures()
@Test (timeout = 120000)
public void testRMAppAttemptFailuresValidityInterval() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
ResourceScheduler.class);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);