diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d3efd1cf7b..ad368cc95e 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED YARN-1796. container-executor shouldn't require o-r permissions (atm) + YARN-2354. DistributedShell may allocate more containers than client + specified after AM restarts. (Li Lu via jianhe) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 5e1cbbcd93..9051d31089 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -208,7 +208,8 @@ public static enum DSEntity { // App Master configuration // No. of containers to run shell command on - private int numTotalContainers = 1; + @VisibleForTesting + protected int numTotalContainers = 1; // Memory to request for the container on which the shell command will run private int containerMemory = 10; // VirtualCores to request for the container on which the shell command will run @@ -594,8 +595,8 @@ public void run() throws YarnException, IOException { List previousAMRunningContainers = response.getContainersFromPreviousAttempts(); - LOG.info("Received " + previousAMRunningContainers.size() - + " previous AM's running containers on AM registration."); + LOG.info(appAttemptID + " received " + previousAMRunningContainers.size() + + " previous attempts' running containers on AM registration."); numAllocatedContainers.addAndGet(previousAMRunningContainers.size()); int numTotalContainersToRequest = @@ -610,7 +611,7 @@ public void run() throws YarnException, IOException { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } - numRequestedContainers.set(numTotalContainersToRequest); + numRequestedContainers.set(numTotalContainers); try { publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END); @@ -689,7 +690,7 @@ public void onContainersCompleted(List completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { - LOG.info("Got container status for containerID=" + LOG.info(appAttemptID + " got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java index db7419bc8e..f3ab4b7538 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java @@ -36,9 +36,11 @@ public void run() throws YarnException, IOException { if (appAttemptID.getAttemptId() == 2) { // should reuse the earlier running container, so numAllocatedContainers // should be set to 1. And should ask no more containers, so - // numRequestedContainers should be set to 0. + // numRequestedContainers should be the same as numTotalContainers. + // The only container is the container requested by the AM in the first + // attempt. if (numAllocatedContainers.get() != 1 - || numRequestedContainers.get() != 0) { + || numRequestedContainers.get() != numTotalContainers) { LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get() + " and NumRequestedContainers is " + numAllocatedContainers.get() + ".Application Master failed. exiting");