MAPREDUCE-5062. Fix MR AM to read max-retries from the RM. Contributed by *Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1460923 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-03-25 22:33:43 +00:00
parent 5e325d4562
commit 46315a2d91
8 changed files with 133 additions and 43 deletions

View File

@ -253,6 +253,9 @@ Release 2.0.5-beta - UNRELEASED
MAPREDUCE-5083. MiniMRCluster should use a random component when creating an MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
actual cluster (Siddharth Seth via hitesh) actual cluster (Siddharth Seth via hitesh)
MAPREDUCE-5062. Fix MR AM to read max-retries from the RM. (Zhijie Shen via
vinodkv)
Release 2.0.4-alpha - UNRELEASED Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -161,6 +161,7 @@ public class MRAppMaster extends CompositeService {
private final int nmPort; private final int nmPort;
private final int nmHttpPort; private final int nmHttpPort;
protected final MRAppMetrics metrics; protected final MRAppMetrics metrics;
private final int maxAppAttempts;
private Map<TaskId, TaskInfo> completedTasksFromPreviousRun; private Map<TaskId, TaskInfo> completedTasksFromPreviousRun;
private List<AMInfo> amInfos; private List<AMInfo> amInfos;
private AppContext context; private AppContext context;
@ -194,14 +195,14 @@ public class MRAppMaster extends CompositeService {
public MRAppMaster(ApplicationAttemptId applicationAttemptId, public MRAppMaster(ApplicationAttemptId applicationAttemptId,
ContainerId containerId, String nmHost, int nmPort, int nmHttpPort, ContainerId containerId, String nmHost, int nmPort, int nmHttpPort,
long appSubmitTime) { long appSubmitTime, int maxAppAttempts) {
this(applicationAttemptId, containerId, nmHost, nmPort, nmHttpPort, this(applicationAttemptId, containerId, nmHost, nmPort, nmHttpPort,
new SystemClock(), appSubmitTime); new SystemClock(), appSubmitTime, maxAppAttempts);
} }
public MRAppMaster(ApplicationAttemptId applicationAttemptId, public MRAppMaster(ApplicationAttemptId applicationAttemptId,
ContainerId containerId, String nmHost, int nmPort, int nmHttpPort, ContainerId containerId, String nmHost, int nmPort, int nmHttpPort,
Clock clock, long appSubmitTime) { Clock clock, long appSubmitTime, int maxAppAttempts) {
super(MRAppMaster.class.getName()); super(MRAppMaster.class.getName());
this.clock = clock; this.clock = clock;
this.startTime = clock.getTime(); this.startTime = clock.getTime();
@ -212,6 +213,7 @@ public MRAppMaster(ApplicationAttemptId applicationAttemptId,
this.nmPort = nmPort; this.nmPort = nmPort;
this.nmHttpPort = nmHttpPort; this.nmHttpPort = nmHttpPort;
this.metrics = MRAppMetrics.create(); this.metrics = MRAppMetrics.create();
this.maxAppAttempts = maxAppAttempts;
LOG.info("Created MRAppMaster for application " + applicationAttemptId); LOG.info("Created MRAppMaster for application " + applicationAttemptId);
} }
@ -221,17 +223,12 @@ public void init(final Configuration conf) {
downloadTokensAndSetupUGI(conf); downloadTokensAndSetupUGI(conf);
//TODO this is a hack, we really need the RM to inform us when we isLastAMRetry = appAttemptID.getAttemptId() >= maxAppAttempts;
// are the last one. This would allow us to configure retries on LOG.info("The specific max attempts: " + maxAppAttempts +
// a per application basis. " for application: " + appAttemptID.getApplicationId().getId() +
int numAMRetries = conf.getInt(YarnConfiguration.RM_AM_MAX_RETRIES, ". Attempt num: " + appAttemptID.getAttemptId() +
YarnConfiguration.DEFAULT_RM_AM_MAX_RETRIES);
isLastAMRetry = appAttemptID.getAttemptId() >= numAMRetries;
LOG.info("AM Retries: " + numAMRetries +
" attempt num: " + appAttemptID.getAttemptId() +
" is last retry: " + isLastAMRetry); " is last retry: " + isLastAMRetry);
context = new RunningAppContext(conf); context = new RunningAppContext(conf);
// Job name is the same as the app name util we support DAG of jobs // Job name is the same as the app name util we support DAG of jobs
@ -266,6 +263,9 @@ public void init(final Configuration conf) {
boolean commitFailure = fs.exists(endCommitFailureFile); boolean commitFailure = fs.exists(endCommitFailureFile);
if(!stagingExists) { if(!stagingExists) {
isLastAMRetry = true; isLastAMRetry = true;
LOG.info("Attempt num: " + appAttemptID.getAttemptId() +
" is last retry: " + isLastAMRetry +
" because the staging dir doesn't exist.");
errorHappenedShutDown = true; errorHappenedShutDown = true;
forcedState = JobStateInternal.ERROR; forcedState = JobStateInternal.ERROR;
shutDownMessage = "Staging dir does not exist " + stagingDir; shutDownMessage = "Staging dir does not exist " + stagingDir;
@ -275,6 +275,9 @@ public void init(final Configuration conf) {
// what result we will use to notify, and how we will unregister // what result we will use to notify, and how we will unregister
errorHappenedShutDown = true; errorHappenedShutDown = true;
isLastAMRetry = true; isLastAMRetry = true;
LOG.info("Attempt num: " + appAttemptID.getAttemptId() +
" is last retry: " + isLastAMRetry +
" because a commit was started.");
copyHistory = true; copyHistory = true;
if (commitSuccess) { if (commitSuccess) {
shutDownMessage = "We crashed after successfully committing. Recovering."; shutDownMessage = "We crashed after successfully committing. Recovering.";
@ -777,6 +780,10 @@ public TaskAttemptListener getTaskAttemptListener() {
return taskAttemptListener; return taskAttemptListener;
} }
public Boolean isLastAMRetry() {
return isLastAMRetry;
}
/** /**
* By the time life-cycle of this router starts, job-init would have already * By the time life-cycle of this router starts, job-init would have already
* happened. * happened.
@ -1206,6 +1213,8 @@ public static void main(String[] args) {
System.getenv(ApplicationConstants.NM_HTTP_PORT_ENV); System.getenv(ApplicationConstants.NM_HTTP_PORT_ENV);
String appSubmitTimeStr = String appSubmitTimeStr =
System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV); System.getenv(ApplicationConstants.APP_SUBMIT_TIME_ENV);
String maxAppAttempts =
System.getenv(ApplicationConstants.MAX_APP_ATTEMPTS_ENV);
validateInputParam(containerIdStr, validateInputParam(containerIdStr,
ApplicationConstants.AM_CONTAINER_ID_ENV); ApplicationConstants.AM_CONTAINER_ID_ENV);
@ -1215,6 +1224,8 @@ public static void main(String[] args) {
ApplicationConstants.NM_HTTP_PORT_ENV); ApplicationConstants.NM_HTTP_PORT_ENV);
validateInputParam(appSubmitTimeStr, validateInputParam(appSubmitTimeStr,
ApplicationConstants.APP_SUBMIT_TIME_ENV); ApplicationConstants.APP_SUBMIT_TIME_ENV);
validateInputParam(maxAppAttempts,
ApplicationConstants.MAX_APP_ATTEMPTS_ENV);
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
ApplicationAttemptId applicationAttemptId = ApplicationAttemptId applicationAttemptId =
@ -1224,7 +1235,8 @@ public static void main(String[] args) {
MRAppMaster appMaster = MRAppMaster appMaster =
new MRAppMaster(applicationAttemptId, containerId, nodeHostString, new MRAppMaster(applicationAttemptId, containerId, nodeHostString,
Integer.parseInt(nodePortString), Integer.parseInt(nodePortString),
Integer.parseInt(nodeHttpPortString), appSubmitTime); Integer.parseInt(nodeHttpPortString), appSubmitTime,
Integer.parseInt(maxAppAttempts));
ShutdownHookManager.get().addShutdownHook( ShutdownHookManager.get().addShutdownHook(
new MRAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY); new MRAppMasterShutdownHook(appMaster), SHUTDOWN_HOOK_PRIORITY);
YarnConfiguration conf = new YarnConfiguration(new JobConf()); YarnConfiguration conf = new YarnConfiguration(new JobConf());

View File

@ -192,7 +192,7 @@ public MRApp(ApplicationAttemptId appAttemptId, ContainerId amContainerId,
int maps, int reduces, boolean autoComplete, String testName, int maps, int reduces, boolean autoComplete, String testName,
boolean cleanOnStart, int startCount, Clock clock) { boolean cleanOnStart, int startCount, Clock clock) {
super(appAttemptId, amContainerId, NM_HOST, NM_PORT, NM_HTTP_PORT, clock, System super(appAttemptId, amContainerId, NM_HOST, NM_PORT, NM_HTTP_PORT, clock, System
.currentTimeMillis()); .currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS);
this.testWorkDir = new File("target", testName); this.testWorkDir = new File("target", testName);
testAbsPath = new Path(testWorkDir.getAbsolutePath()); testAbsPath = new Path(testWorkDir.getAbsolutePath());
LOG.info("PathUsed: " + testAbsPath); LOG.info("PathUsed: " + testAbsPath);

View File

@ -30,11 +30,15 @@
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEvent;
import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler;
import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal;
import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
@ -42,6 +46,7 @@
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -80,7 +85,7 @@ public void testMRAppMasterForDifferentUser() throws IOException,
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
MRAppMasterTest appMaster = MRAppMasterTest appMaster =
new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
System.currentTimeMillis()); System.currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS);
YarnConfiguration conf = new YarnConfiguration(); YarnConfiguration conf = new YarnConfiguration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
@ -109,7 +114,8 @@ public void testMRAppMasterMidLock() throws IOException,
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
MRAppMaster appMaster = MRAppMaster appMaster =
new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
System.currentTimeMillis(), false); System.currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS,
false, false);
boolean caught = false; boolean caught = false;
try { try {
MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
@ -144,7 +150,8 @@ public void testMRAppMasterSuccessLock() throws IOException,
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
MRAppMaster appMaster = MRAppMaster appMaster =
new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
System.currentTimeMillis(), false); System.currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS,
false, false);
boolean caught = false; boolean caught = false;
try { try {
MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
@ -179,7 +186,8 @@ public void testMRAppMasterFailLock() throws IOException,
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
MRAppMaster appMaster = MRAppMaster appMaster =
new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
System.currentTimeMillis(), false); System.currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS,
false, false);
boolean caught = false; boolean caught = false;
try { try {
MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
@ -214,7 +222,8 @@ public void testMRAppMasterMissingStaging() throws IOException,
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
MRAppMaster appMaster = MRAppMaster appMaster =
new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
System.currentTimeMillis(), false); System.currentTimeMillis(), MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS,
false, false);
boolean caught = false; boolean caught = false;
try { try {
MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
@ -230,36 +239,73 @@ public void testMRAppMasterMissingStaging() throws IOException,
assertEquals(JobStateInternal.ERROR, appMaster.forcedState); assertEquals(JobStateInternal.ERROR, appMaster.forcedState);
appMaster.stop(); appMaster.stop();
} }
@Test (timeout = 30000)
public void testMRAppMasterMaxAppAttempts() throws IOException,
InterruptedException {
int[] maxAppAttemtps = new int[] { 1, 2, 3 };
Boolean[] expectedBools = new Boolean[]{ true, true, false };
String applicationAttemptIdStr = "appattempt_1317529182569_0004_000002";
String containerIdStr = "container_1317529182569_0004_000002_1";
String userName = "TestAppMasterUser";
ApplicationAttemptId applicationAttemptId = ConverterUtils
.toApplicationAttemptId(applicationAttemptIdStr);
ContainerId containerId = ConverterUtils.toContainerId(containerIdStr);
YarnConfiguration conf = new YarnConfiguration();
conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
File stagingDir =
new File(MRApps.getStagingAreaDir(conf, userName).toString());
stagingDir.mkdirs();
for (int i = 0; i < maxAppAttemtps.length; ++i) {
MRAppMasterTest appMaster =
new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1,
System.currentTimeMillis(), maxAppAttemtps[i], false, true);
MRAppMaster.initAndStartAppMaster(appMaster, conf, userName);
assertEquals("isLastAMRetry is correctly computed.", expectedBools[i],
appMaster.isLastAMRetry());
}
}
} }
class MRAppMasterTest extends MRAppMaster { class MRAppMasterTest extends MRAppMaster {
Path stagingDirPath; Path stagingDirPath;
private Configuration conf; private Configuration conf;
private boolean overrideInitAndStart; private boolean overrideInit;
private boolean overrideStart;
ContainerAllocator mockContainerAllocator; ContainerAllocator mockContainerAllocator;
CommitterEventHandler mockCommitterEventHandler;
RMHeartbeatHandler mockRMHeartbeatHandler;
public MRAppMasterTest(ApplicationAttemptId applicationAttemptId, public MRAppMasterTest(ApplicationAttemptId applicationAttemptId,
ContainerId containerId, String host, int port, int httpPort, ContainerId containerId, String host, int port, int httpPort,
long submitTime) { long submitTime, int maxAppAttempts) {
this(applicationAttemptId, containerId, host, port, httpPort, submitTime, this(applicationAttemptId, containerId, host, port, httpPort,
true); submitTime, maxAppAttempts, true, true);
} }
public MRAppMasterTest(ApplicationAttemptId applicationAttemptId, public MRAppMasterTest(ApplicationAttemptId applicationAttemptId,
ContainerId containerId, String host, int port, int httpPort, ContainerId containerId, String host, int port, int httpPort,
long submitTime, boolean overrideInitAndStart) { long submitTime, int maxAppAttempts, boolean overrideInit,
super(applicationAttemptId, containerId, host, port, httpPort, submitTime); boolean overrideStart) {
this.overrideInitAndStart = overrideInitAndStart; super(applicationAttemptId, containerId, host, port, httpPort, submitTime,
maxAppAttempts);
this.overrideInit = overrideInit;
this.overrideStart = overrideStart;
mockContainerAllocator = mock(ContainerAllocator.class); mockContainerAllocator = mock(ContainerAllocator.class);
mockCommitterEventHandler = mock(CommitterEventHandler.class);
mockRMHeartbeatHandler = mock(RMHeartbeatHandler.class);
} }
@Override @Override
public void init(Configuration conf) { public void init(Configuration conf) {
if (overrideInitAndStart) { if (!overrideInit) {
this.conf = conf;
} else {
super.init(conf); super.init(conf);
} }
this.conf = conf;
} }
@Override @Override
@ -277,9 +323,20 @@ protected ContainerAllocator createContainerAllocator(
return mockContainerAllocator; return mockContainerAllocator;
} }
@Override
protected EventHandler<CommitterEvent> createCommitterEventHandler(
AppContext context, OutputCommitter committer) {
return mockCommitterEventHandler;
}
@Override
protected RMHeartbeatHandler getRMHeartbeatHandler() {
return mockRMHeartbeatHandler;
}
@Override @Override
public void start() { public void start() {
if (overrideInitAndStart) { if (overrideStart) {
try { try {
String user = UserGroupInformation.getCurrentUser().getShortUserName(); String user = UserGroupInformation.getCurrentUser().getShortUserName();
stagingDirPath = MRApps.getStagingAreaDir(conf, user); stagingDirPath = MRApps.getStagingAreaDir(conf, user);

View File

@ -49,7 +49,6 @@
import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.YarnException;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.service.AbstractService; import org.apache.hadoop.yarn.service.AbstractService;
@ -93,10 +92,9 @@ public void testDeletionofStaging() throws IOException {
verify(fs).delete(stagingJobPath, true); verify(fs).delete(stagingJobPath, true);
} }
@Test @Test (timeout = 30000)
public void testDeletionofStagingOnKill() throws IOException { public void testDeletionofStagingOnKill() throws IOException {
conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir); conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir);
conf.setInt(YarnConfiguration.RM_AM_MAX_RETRIES, 4);
fs = mock(FileSystem.class); fs = mock(FileSystem.class);
when(fs.delete(any(Path.class), anyBoolean())).thenReturn(true); when(fs.delete(any(Path.class), anyBoolean())).thenReturn(true);
//Staging Dir exists //Staging Dir exists
@ -113,7 +111,7 @@ public void testDeletionofStagingOnKill() throws IOException {
JobId jobid = recordFactory.newRecordInstance(JobId.class); JobId jobid = recordFactory.newRecordInstance(JobId.class);
jobid.setAppId(appId); jobid.setAppId(appId);
ContainerAllocator mockAlloc = mock(ContainerAllocator.class); ContainerAllocator mockAlloc = mock(ContainerAllocator.class);
MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc); MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc, 4);
appMaster.init(conf); appMaster.init(conf);
//simulate the process being killed //simulate the process being killed
MRAppMaster.MRAppMasterShutdownHook hook = MRAppMaster.MRAppMasterShutdownHook hook =
@ -122,10 +120,9 @@ public void testDeletionofStagingOnKill() throws IOException {
verify(fs, times(0)).delete(stagingJobPath, true); verify(fs, times(0)).delete(stagingJobPath, true);
} }
@Test @Test (timeout = 30000)
public void testDeletionofStagingOnKillLastTry() throws IOException { public void testDeletionofStagingOnKillLastTry() throws IOException {
conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir); conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir);
conf.setInt(YarnConfiguration.RM_AM_MAX_RETRIES, 1);
fs = mock(FileSystem.class); fs = mock(FileSystem.class);
when(fs.delete(any(Path.class), anyBoolean())).thenReturn(true); when(fs.delete(any(Path.class), anyBoolean())).thenReturn(true);
//Staging Dir exists //Staging Dir exists
@ -142,7 +139,8 @@ public void testDeletionofStagingOnKillLastTry() throws IOException {
JobId jobid = recordFactory.newRecordInstance(JobId.class); JobId jobid = recordFactory.newRecordInstance(JobId.class);
jobid.setAppId(appId); jobid.setAppId(appId);
ContainerAllocator mockAlloc = mock(ContainerAllocator.class); ContainerAllocator mockAlloc = mock(ContainerAllocator.class);
MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc); MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc,
MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS);
appMaster.init(conf); appMaster.init(conf);
//simulate the process being killed //simulate the process being killed
MRAppMaster.MRAppMasterShutdownHook hook = MRAppMaster.MRAppMasterShutdownHook hook =
@ -155,15 +153,16 @@ private class TestMRApp extends MRAppMaster {
ContainerAllocator allocator; ContainerAllocator allocator;
public TestMRApp(ApplicationAttemptId applicationAttemptId, public TestMRApp(ApplicationAttemptId applicationAttemptId,
ContainerAllocator allocator) { ContainerAllocator allocator, int maxAppAttempts) {
super(applicationAttemptId, BuilderUtils.newContainerId( super(applicationAttemptId, BuilderUtils.newContainerId(
applicationAttemptId, 1), "testhost", 2222, 3333, System applicationAttemptId, 1), "testhost", 2222, 3333,
.currentTimeMillis()); System.currentTimeMillis(), maxAppAttempts);
this.allocator = allocator; this.allocator = allocator;
} }
public TestMRApp(ApplicationAttemptId applicationAttemptId) { public TestMRApp(ApplicationAttemptId applicationAttemptId) {
this(applicationAttemptId, null); this(applicationAttemptId, null,
MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS);
} }
@Override @Override

View File

@ -664,4 +664,12 @@ public interface MRJobConfig {
public static final String WORKFLOW_ADJACENCY_PREFIX_PATTERN = public static final String WORKFLOW_ADJACENCY_PREFIX_PATTERN =
"^mapreduce\\.workflow\\.adjacency\\..+"; "^mapreduce\\.workflow\\.adjacency\\..+";
/**
* The maximum number of application attempts.
* It is a application-specific setting.
*/
public static final String MR_AM_MAX_ATTEMPTS = "mapreduce.am.max-attempts";
public static final int DEFAULT_MR_AM_MAX_ATTEMPTS = 1;
} }

View File

@ -806,6 +806,14 @@
</description> </description>
</property> </property>
<property>
<name>mapreduce.am.max-attempts</name>
<value>1</value>
<description>The maximum number of application attempts. It is a
application-specific setting. It should not be larger than the global number
set by resourcemanager. Otherwise, it will be override.</description>
</property>
<!-- Job Notification Configuration --> <!-- Job Notification Configuration -->
<property> <property>
<name>mapreduce.job.end-notification.url</name> <name>mapreduce.job.end-notification.url</name>

View File

@ -481,6 +481,9 @@ public ApplicationSubmissionContext createApplicationSubmissionContext(
appContext.setCancelTokensWhenComplete( appContext.setCancelTokensWhenComplete(
conf.getBoolean(MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN, true)); conf.getBoolean(MRJobConfig.JOB_CANCEL_DELEGATION_TOKEN, true));
appContext.setAMContainerSpec(amContainer); // AM Container appContext.setAMContainerSpec(amContainer); // AM Container
appContext.setMaxAppAttempts(
conf.getInt(MRJobConfig.MR_AM_MAX_ATTEMPTS,
MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS));
return appContext; return appContext;
} }