MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by default (Ravi Prakash via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1408444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-11-12 20:32:04 +00:00
parent 7ff7f67a82
commit cf6ab9b2e7
4 changed files with 65 additions and 77 deletions

View File

@ -658,6 +658,9 @@ Release 0.23.5 - UNRELEASED
MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe
via bobby)
MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by
default (Ravi Prakash via bobby)
Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -53,7 +53,7 @@ public class JobEndNotifier implements Configurable {
protected String userUrl;
protected String proxyConf;
protected int numTries; //Number of tries to attempt notification
protected int waitInterval; //Time to wait between retrying notification
protected int waitInterval; //Time (ms) to wait between retrying notification
protected URL urlToNotify; //URL to notify read from the config
protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification
@ -71,10 +71,10 @@ public void setConf(Configuration conf) {
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
);
waitInterval = Math.min(
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5)
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5)
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
);
waitInterval = (waitInterval < 0) ? 5 : waitInterval;
waitInterval = (waitInterval < 0) ? 5000 : waitInterval;
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);

View File

@ -55,22 +55,22 @@ private void testNumRetries(Configuration conf) {
//Test maximum retry interval is capped by
//MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL
private void testWaitInterval(Configuration conf) {
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000");
setConf(conf);
Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval,
waitInterval == 1);
Assert.assertTrue("Expected waitInterval to be 1000, but was "
+ waitInterval, waitInterval == 1000);
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000");
setConf(conf);
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
waitInterval == 5);
Assert.assertTrue("Expected waitInterval to be 5000, but was "
+ waitInterval, waitInterval == 5000);
//Test negative numbers are set to default
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10");
setConf(conf);
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
waitInterval == 5);
Assert.assertTrue("Expected waitInterval to be 5000, but was "
+ waitInterval, waitInterval == 5000);
}
private void testProxyConfiguration(Configuration conf) {
@ -125,10 +125,6 @@ protected boolean notifyURLOnce() {
public void testNotifyRetries() throws InterruptedException {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
JobReport jobReport = Mockito.mock(JobReport.class);
long startTime = System.currentTimeMillis();
@ -136,6 +132,21 @@ public void testNotifyRetries() throws InterruptedException {
this.setConf(conf);
this.notify(jobReport);
long endTime = System.currentTimeMillis();
Assert.assertEquals("Only 1 try was expected but was : "
+ this.notificationCount, this.notificationCount, 1);
Assert.assertTrue("Should have taken more than 5 seconds it took "
+ (endTime - startTime), endTime - startTime > 5000);
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
startTime = System.currentTimeMillis();
this.notificationCount = 0;
this.setConf(conf);
this.notify(jobReport);
endTime = System.currentTimeMillis();
Assert.assertEquals("Only 3 retries were expected but was : "
+ this.notificationCount, this.notificationCount, 3);
Assert.assertTrue("Should have taken more than 9 seconds it took "

View File

@ -610,36 +610,6 @@
</description>
</property>
<!-- Job Notification Configuration -->
<!--
<property>
<name>mapreduce.job.end-notification.url</name>
<value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>
<description>Indicates url which will be called on completion of job to inform
end status of job.
User can give at most 2 variables with URI : $jobId and $jobStatus.
If they are present in URI, then they will be replaced by their
respective values.
</description>
</property>
-->
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>0</value>
<description>Indicates how many times hadoop should attempt to contact the
notification URL </description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>30000</value>
<description>Indicates time in milliseconds between notification URL retry
calls</description>
</property>
<property>
<name>mapreduce.job.queuename</name>
<value>default</value>
@ -802,6 +772,34 @@
</description>
</property>
<!-- Job Notification Configuration -->
<property>
<name>mapreduce.job.end-notification.url</name>
<!--<value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>-->
<description>Indicates url which will be called on completion of job to inform
end status of job.
User can give at most 2 variables with URI : $jobId and $jobStatus.
If they are present in URI, then they will be replaced by their
respective values.
</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>0</value>
<description>The number of times the submitter of the job wants to retry job
end notification if it fails. This is capped by
mapreduce.job.end-notification.max.attempts</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>1000</value>
<description>The number of milliseconds the submitter of the job wants to
wait before job end notification is retried if it fails. This is capped by
mapreduce.job.end-notification.max.retry.interval</description>
</property>
<property>
<name>mapreduce.job.end-notification.max.attempts</name>
<value>5</value>
@ -815,36 +813,12 @@
<property>
<name>mapreduce.job.end-notification.max.retry.interval</name>
<value>5</value>
<value>5000</value>
<final>true</final>
<description>The maximum amount of time (in seconds) to wait before retrying
job end notification. Cluster administrators can set this to limit how long
the Application Master waits before exiting. Must be marked as final to
prevent users from overriding this.</description>
</property>
<property>
<name>mapreduce.job.end-notification.url</name>
<value></value>
<description>The URL to send job end notification. It may contain sentinels
$jobId and $jobStatus which will be replaced with jobId and jobStatus.
</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>5</value>
<description>The number of times the submitter of the job wants to retry job
end notification if it fails. This is capped by
mapreduce.job.end-notification.max.attempts</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>1</value>
<description>The number of seconds the submitter of the job wants to wait
before job end notification is retried if it fails. This is capped by
mapreduce.job.end-notification.max.retry.interval</description>
<description>The maximum amount of time (in milliseconds) to wait before
retrying job end notification. Cluster administrators can set this to
limit how long the Application Master waits before exiting. Must be marked
as final to prevent users from overriding this.</description>
</property>
<property>