YARN-2354. DistributedShell may allocate more containers than client specified after AM restarts. Contributed by Li Lu
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1614538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c0b49ff107
commit
7e54b1c6d9
@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED
|
||||
|
||||
YARN-1796. container-executor shouldn't require o-r permissions (atm)
|
||||
|
||||
YARN-2354. DistributedShell may allocate more containers than client
|
||||
specified after AM restarts. (Li Lu via jianhe)
|
||||
|
||||
Release 2.5.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -208,7 +208,8 @@ public class ApplicationMaster {
|
||||
|
||||
// App Master configuration
|
||||
// No. of containers to run shell command on
|
||||
private int numTotalContainers = 1;
|
||||
@VisibleForTesting
|
||||
protected int numTotalContainers = 1;
|
||||
// Memory to request for the container on which the shell command will run
|
||||
private int containerMemory = 10;
|
||||
// VirtualCores to request for the container on which the shell command will run
|
||||
@ -594,8 +595,8 @@ public class ApplicationMaster {
|
||||
|
||||
List<Container> previousAMRunningContainers =
|
||||
response.getContainersFromPreviousAttempts();
|
||||
LOG.info("Received " + previousAMRunningContainers.size()
|
||||
+ " previous AM's running containers on AM registration.");
|
||||
LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
|
||||
+ " previous attempts' running containers on AM registration.");
|
||||
numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
|
||||
|
||||
int numTotalContainersToRequest =
|
||||
@ -610,7 +611,7 @@ public class ApplicationMaster {
|
||||
ContainerRequest containerAsk = setupContainerAskForRM();
|
||||
amRMClient.addContainerRequest(containerAsk);
|
||||
}
|
||||
numRequestedContainers.set(numTotalContainersToRequest);
|
||||
numRequestedContainers.set(numTotalContainers);
|
||||
try {
|
||||
publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
|
||||
DSEvent.DS_APP_ATTEMPT_END);
|
||||
@ -689,7 +690,7 @@ public class ApplicationMaster {
|
||||
LOG.info("Got response from RM for container ask, completedCnt="
|
||||
+ completedContainers.size());
|
||||
for (ContainerStatus containerStatus : completedContainers) {
|
||||
LOG.info("Got container status for containerID="
|
||||
LOG.info(appAttemptID + " got container status for containerID="
|
||||
+ containerStatus.getContainerId() + ", state="
|
||||
+ containerStatus.getState() + ", exitStatus="
|
||||
+ containerStatus.getExitStatus() + ", diagnostics="
|
||||
|
@ -36,9 +36,11 @@ public class TestDSFailedAppMaster extends ApplicationMaster {
|
||||
if (appAttemptID.getAttemptId() == 2) {
|
||||
// should reuse the earlier running container, so numAllocatedContainers
|
||||
// should be set to 1. And should ask no more containers, so
|
||||
// numRequestedContainers should be set to 0.
|
||||
// numRequestedContainers should be the same as numTotalContainers.
|
||||
// The only container is the container requested by the AM in the first
|
||||
// attempt.
|
||||
if (numAllocatedContainers.get() != 1
|
||||
|| numRequestedContainers.get() != 0) {
|
||||
|| numRequestedContainers.get() != numTotalContainers) {
|
||||
LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
|
||||
+ " and NumRequestedContainers is " + numAllocatedContainers.get()
|
||||
+ ".Application Master failed. exiting");
|
||||
|
Loading…
x
Reference in New Issue
Block a user