YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails. (jlowe via jeagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1407606 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Turner Eagles 2012-11-09 19:52:28 +00:00
parent f8c486fbc8
commit 9e3af0ccd2
2 changed files with 32 additions and 29 deletions

View File

@ -213,6 +213,9 @@ Release 0.23.5 - UNRELEASED
YARN-201. Fix CapacityScheduler to be less conservative for starved
off-switch requests. (jlowe via acmurthy)
YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails.
(jlowe via jeagles)
Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -81,38 +81,38 @@ public void testAppCleanup() throws Exception {
new ArrayList<ContainerId>()).getAllocatedContainers();
int contReceived = conts.size();
int waitCount = 0;
while (contReceived < request && waitCount++ < 20) {
while (contReceived < request && waitCount++ < 200) {
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(100);
conts = am.allocate(new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>()).getAllocatedContainers();
contReceived += conts.size();
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(2000);
}
Assert.assertEquals(request, conts.size());
Assert.assertEquals(request, contReceived);
am.unregisterAppAttempt();
HeartbeatResponse resp = nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1,
ContainerState.COMPLETE);
am.waitForState(RMAppAttemptState.FINISHED);
int cleanedConts = 0;
int cleanedApps = 0;
List<ContainerId> contsToClean = null;
List<ApplicationId> apps = null;
//currently only containers are cleaned via this
//AM container is cleaned via container launcher
resp = nm1.nodeHeartbeat(true);
List<ContainerId> contsToClean = resp.getContainersToCleanupList();
List<ApplicationId> apps = resp.getApplicationsToCleanupList();
int cleanedConts = contsToClean.size();
int cleanedApps = apps.size();
waitCount = 0;
while ((cleanedConts < 2 || cleanedApps < 1) && waitCount++ < 20) {
contsToClean = resp.getContainersToCleanupList();
apps = resp.getApplicationsToCleanupList();
while ((cleanedConts < 2 || cleanedApps < 1) && waitCount++ < 200) {
LOG.info("Waiting to get cleanup events.. cleanedConts: "
+ cleanedConts + " cleanedApps: " + cleanedApps);
Thread.sleep(100);
resp = nm1.nodeHeartbeat(true);
contsToClean = resp.getContainersToCleanupList();
apps = resp.getApplicationsToCleanupList();
cleanedConts += contsToClean.size();
cleanedApps += apps.size();
Thread.sleep(1000);
resp = nm1.nodeHeartbeat(true);
}
Assert.assertEquals(1, apps.size());
@ -170,20 +170,20 @@ protected Dispatcher createDispatcher() {
new ArrayList<ContainerId>()).getAllocatedContainers();
int contReceived = conts.size();
int waitCount = 0;
while (contReceived < request && waitCount++ < 20) {
while (contReceived < request && waitCount++ < 200) {
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(100);
conts = am.allocate(new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>()).getAllocatedContainers();
dispatcher.await();
contReceived += conts.size();
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(2000);
}
Assert.assertEquals(request, conts.size());
Assert.assertEquals(request, contReceived);
// Release a container.
ArrayList<ContainerId> release = new ArrayList<ContainerId>();
release.add(conts.get(1).getId());
release.add(conts.get(0).getId());
am.allocate(new ArrayList<ResourceRequest>(), release);
dispatcher.await();
@ -194,7 +194,7 @@ protected Dispatcher createDispatcher() {
new HashMap<ApplicationId, List<ContainerStatus>>();
ArrayList<ContainerStatus> containerStatusList =
new ArrayList<ContainerStatus>();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(1)
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
.getId(), ContainerState.RUNNING, "nothing", 0));
containerStatuses.put(app.getApplicationId(), containerStatusList);
@ -203,13 +203,13 @@ protected Dispatcher createDispatcher() {
List<ContainerId> contsToClean = resp.getContainersToCleanupList();
int cleanedConts = contsToClean.size();
waitCount = 0;
while (cleanedConts < 1 && waitCount++ < 20) {
while (cleanedConts < 1 && waitCount++ < 200) {
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
Thread.sleep(100);
resp = nm1.nodeHeartbeat(true);
dispatcher.await();
contsToClean = resp.getContainersToCleanupList();
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
cleanedConts += contsToClean.size();
Thread.sleep(1000);
}
LOG.info("Got cleanup for " + contsToClean.get(0));
Assert.assertEquals(1, cleanedConts);
@ -220,7 +220,7 @@ protected Dispatcher createDispatcher() {
+ "NM getting cleanup");
containerStatuses.clear();
containerStatusList.clear();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(1)
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
.getId(), ContainerState.RUNNING, "nothing", 0));
containerStatuses.put(app.getApplicationId(), containerStatusList);
@ -231,13 +231,13 @@ protected Dispatcher createDispatcher() {
// The cleanup list won't be instantaneous as it is given out by scheduler
// and not RMNodeImpl.
waitCount = 0;
while (cleanedConts < 1 && waitCount++ < 20) {
while (cleanedConts < 1 && waitCount++ < 200) {
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
Thread.sleep(100);
resp = nm1.nodeHeartbeat(true);
dispatcher.await();
contsToClean = resp.getContainersToCleanupList();
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
cleanedConts += contsToClean.size();
Thread.sleep(1000);
}
LOG.info("Got cleanup for " + contsToClean.get(0));
Assert.assertEquals(1, cleanedConts);