MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by default (Ravi Prakash via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1408444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7ff7f67a82
commit
cf6ab9b2e7
@ -658,6 +658,9 @@ Release 0.23.5 - UNRELEASED
|
||||
MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe
|
||||
via bobby)
|
||||
|
||||
MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by
|
||||
default (Ravi Prakash via bobby)
|
||||
|
||||
Release 0.23.4 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -53,7 +53,7 @@ public class JobEndNotifier implements Configurable {
|
||||
protected String userUrl;
|
||||
protected String proxyConf;
|
||||
protected int numTries; //Number of tries to attempt notification
|
||||
protected int waitInterval; //Time to wait between retrying notification
|
||||
protected int waitInterval; //Time (ms) to wait between retrying notification
|
||||
protected URL urlToNotify; //URL to notify read from the config
|
||||
protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification
|
||||
|
||||
@ -71,10 +71,10 @@ public void setConf(Configuration conf) {
|
||||
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
|
||||
);
|
||||
waitInterval = Math.min(
|
||||
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5)
|
||||
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5)
|
||||
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
|
||||
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
|
||||
);
|
||||
waitInterval = (waitInterval < 0) ? 5 : waitInterval;
|
||||
waitInterval = (waitInterval < 0) ? 5000 : waitInterval;
|
||||
|
||||
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);
|
||||
|
||||
|
@ -55,22 +55,22 @@ private void testNumRetries(Configuration conf) {
|
||||
//Test maximum retry interval is capped by
|
||||
//MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL
|
||||
private void testWaitInterval(Configuration conf) {
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1");
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000");
|
||||
setConf(conf);
|
||||
Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval,
|
||||
waitInterval == 1);
|
||||
Assert.assertTrue("Expected waitInterval to be 1000, but was "
|
||||
+ waitInterval, waitInterval == 1000);
|
||||
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000");
|
||||
setConf(conf);
|
||||
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
|
||||
waitInterval == 5);
|
||||
Assert.assertTrue("Expected waitInterval to be 5000, but was "
|
||||
+ waitInterval, waitInterval == 5000);
|
||||
|
||||
//Test negative numbers are set to default
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10");
|
||||
setConf(conf);
|
||||
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
|
||||
waitInterval == 5);
|
||||
Assert.assertTrue("Expected waitInterval to be 5000, but was "
|
||||
+ waitInterval, waitInterval == 5000);
|
||||
}
|
||||
|
||||
private void testProxyConfiguration(Configuration conf) {
|
||||
@ -125,10 +125,6 @@ protected boolean notifyURLOnce() {
|
||||
public void testNotifyRetries() throws InterruptedException {
|
||||
Configuration conf = new Configuration();
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
|
||||
JobReport jobReport = Mockito.mock(JobReport.class);
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
@ -136,6 +132,21 @@ public void testNotifyRetries() throws InterruptedException {
|
||||
this.setConf(conf);
|
||||
this.notify(jobReport);
|
||||
long endTime = System.currentTimeMillis();
|
||||
Assert.assertEquals("Only 1 try was expected but was : "
|
||||
+ this.notificationCount, this.notificationCount, 1);
|
||||
Assert.assertTrue("Should have taken more than 5 seconds it took "
|
||||
+ (endTime - startTime), endTime - startTime > 5000);
|
||||
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
|
||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
|
||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
|
||||
|
||||
startTime = System.currentTimeMillis();
|
||||
this.notificationCount = 0;
|
||||
this.setConf(conf);
|
||||
this.notify(jobReport);
|
||||
endTime = System.currentTimeMillis();
|
||||
Assert.assertEquals("Only 3 retries were expected but was : "
|
||||
+ this.notificationCount, this.notificationCount, 3);
|
||||
Assert.assertTrue("Should have taken more than 9 seconds it took "
|
||||
|
@ -610,36 +610,6 @@
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Job Notification Configuration -->
|
||||
|
||||
<!--
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.url</name>
|
||||
<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>
|
||||
<description>Indicates url which will be called on completion of job to inform
|
||||
end status of job.
|
||||
User can give at most 2 variables with URI : $jobId and $jobStatus.
|
||||
If they are present in URI, then they will be replaced by their
|
||||
respective values.
|
||||
</description>
|
||||
</property>
|
||||
-->
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.retry.attempts</name>
|
||||
<value>0</value>
|
||||
<description>Indicates how many times hadoop should attempt to contact the
|
||||
notification URL </description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.retry.interval</name>
|
||||
<value>30000</value>
|
||||
<description>Indicates time in milliseconds between notification URL retry
|
||||
calls</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.queuename</name>
|
||||
<value>default</value>
|
||||
@ -802,6 +772,34 @@
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<!-- Job Notification Configuration -->
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.url</name>
|
||||
<!--<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>-->
|
||||
<description>Indicates url which will be called on completion of job to inform
|
||||
end status of job.
|
||||
User can give at most 2 variables with URI : $jobId and $jobStatus.
|
||||
If they are present in URI, then they will be replaced by their
|
||||
respective values.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.retry.attempts</name>
|
||||
<value>0</value>
|
||||
<description>The number of times the submitter of the job wants to retry job
|
||||
end notification if it fails. This is capped by
|
||||
mapreduce.job.end-notification.max.attempts</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.retry.interval</name>
|
||||
<value>1000</value>
|
||||
<description>The number of milliseconds the submitter of the job wants to
|
||||
wait before job end notification is retried if it fails. This is capped by
|
||||
mapreduce.job.end-notification.max.retry.interval</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.max.attempts</name>
|
||||
<value>5</value>
|
||||
@ -815,36 +813,12 @@
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.max.retry.interval</name>
|
||||
<value>5</value>
|
||||
<value>5000</value>
|
||||
<final>true</final>
|
||||
<description>The maximum amount of time (in seconds) to wait before retrying
|
||||
job end notification. Cluster administrators can set this to limit how long
|
||||
the Application Master waits before exiting. Must be marked as final to
|
||||
prevent users from overriding this.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.url</name>
|
||||
<value></value>
|
||||
<description>The URL to send job end notification. It may contain sentinels
|
||||
$jobId and $jobStatus which will be replaced with jobId and jobStatus.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.retry.attempts</name>
|
||||
<value>5</value>
|
||||
<description>The number of times the submitter of the job wants to retry job
|
||||
end notification if it fails. This is capped by
|
||||
mapreduce.job.end-notification.max.attempts</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.job.end-notification.retry.interval</name>
|
||||
<value>1</value>
|
||||
<description>The number of seconds the submitter of the job wants to wait
|
||||
before job end notification is retried if it fails. This is capped by
|
||||
mapreduce.job.end-notification.max.retry.interval</description>
|
||||
<description>The maximum amount of time (in milliseconds) to wait before
|
||||
retrying job end notification. Cluster administrators can set this to
|
||||
limit how long the Application Master waits before exiting. Must be marked
|
||||
as final to prevent users from overriding this.</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
|
Loading…
Reference in New Issue
Block a user