MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by default (Ravi Prakash via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1408444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7ff7f67a82
commit
cf6ab9b2e7
@ -658,6 +658,9 @@ Release 0.23.5 - UNRELEASED
|
|||||||
MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe
|
MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe
|
||||||
via bobby)
|
via bobby)
|
||||||
|
|
||||||
|
MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by
|
||||||
|
default (Ravi Prakash via bobby)
|
||||||
|
|
||||||
Release 0.23.4 - UNRELEASED
|
Release 0.23.4 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -53,7 +53,7 @@ public class JobEndNotifier implements Configurable {
|
|||||||
protected String userUrl;
|
protected String userUrl;
|
||||||
protected String proxyConf;
|
protected String proxyConf;
|
||||||
protected int numTries; //Number of tries to attempt notification
|
protected int numTries; //Number of tries to attempt notification
|
||||||
protected int waitInterval; //Time to wait between retrying notification
|
protected int waitInterval; //Time (ms) to wait between retrying notification
|
||||||
protected URL urlToNotify; //URL to notify read from the config
|
protected URL urlToNotify; //URL to notify read from the config
|
||||||
protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification
|
protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification
|
||||||
|
|
||||||
@ -71,10 +71,10 @@ public void setConf(Configuration conf) {
|
|||||||
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
|
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
|
||||||
);
|
);
|
||||||
waitInterval = Math.min(
|
waitInterval = Math.min(
|
||||||
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5)
|
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
|
||||||
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5)
|
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
|
||||||
);
|
);
|
||||||
waitInterval = (waitInterval < 0) ? 5 : waitInterval;
|
waitInterval = (waitInterval < 0) ? 5000 : waitInterval;
|
||||||
|
|
||||||
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);
|
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);
|
||||||
|
|
||||||
|
@ -55,22 +55,22 @@ private void testNumRetries(Configuration conf) {
|
|||||||
//Test maximum retry interval is capped by
|
//Test maximum retry interval is capped by
|
||||||
//MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL
|
//MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL
|
||||||
private void testWaitInterval(Configuration conf) {
|
private void testWaitInterval(Configuration conf) {
|
||||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5");
|
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000");
|
||||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1");
|
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000");
|
||||||
setConf(conf);
|
setConf(conf);
|
||||||
Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval,
|
Assert.assertTrue("Expected waitInterval to be 1000, but was "
|
||||||
waitInterval == 1);
|
+ waitInterval, waitInterval == 1000);
|
||||||
|
|
||||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10");
|
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000");
|
||||||
setConf(conf);
|
setConf(conf);
|
||||||
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
|
Assert.assertTrue("Expected waitInterval to be 5000, but was "
|
||||||
waitInterval == 5);
|
+ waitInterval, waitInterval == 5000);
|
||||||
|
|
||||||
//Test negative numbers are set to default
|
//Test negative numbers are set to default
|
||||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10");
|
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10");
|
||||||
setConf(conf);
|
setConf(conf);
|
||||||
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
|
Assert.assertTrue("Expected waitInterval to be 5000, but was "
|
||||||
waitInterval == 5);
|
+ waitInterval, waitInterval == 5000);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testProxyConfiguration(Configuration conf) {
|
private void testProxyConfiguration(Configuration conf) {
|
||||||
@ -125,10 +125,6 @@ protected boolean notifyURLOnce() {
|
|||||||
public void testNotifyRetries() throws InterruptedException {
|
public void testNotifyRetries() throws InterruptedException {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
|
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
|
||||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
|
|
||||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
|
|
||||||
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
|
|
||||||
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
|
|
||||||
JobReport jobReport = Mockito.mock(JobReport.class);
|
JobReport jobReport = Mockito.mock(JobReport.class);
|
||||||
|
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
@ -136,6 +132,21 @@ public void testNotifyRetries() throws InterruptedException {
|
|||||||
this.setConf(conf);
|
this.setConf(conf);
|
||||||
this.notify(jobReport);
|
this.notify(jobReport);
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
|
Assert.assertEquals("Only 1 try was expected but was : "
|
||||||
|
+ this.notificationCount, this.notificationCount, 1);
|
||||||
|
Assert.assertTrue("Should have taken more than 5 seconds it took "
|
||||||
|
+ (endTime - startTime), endTime - startTime > 5000);
|
||||||
|
|
||||||
|
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
|
||||||
|
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
|
||||||
|
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
|
||||||
|
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
|
||||||
|
|
||||||
|
startTime = System.currentTimeMillis();
|
||||||
|
this.notificationCount = 0;
|
||||||
|
this.setConf(conf);
|
||||||
|
this.notify(jobReport);
|
||||||
|
endTime = System.currentTimeMillis();
|
||||||
Assert.assertEquals("Only 3 retries were expected but was : "
|
Assert.assertEquals("Only 3 retries were expected but was : "
|
||||||
+ this.notificationCount, this.notificationCount, 3);
|
+ this.notificationCount, this.notificationCount, 3);
|
||||||
Assert.assertTrue("Should have taken more than 9 seconds it took "
|
Assert.assertTrue("Should have taken more than 9 seconds it took "
|
||||||
|
@ -610,36 +610,6 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!-- Job Notification Configuration -->
|
|
||||||
|
|
||||||
<!--
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.end-notification.url</name>
|
|
||||||
<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>
|
|
||||||
<description>Indicates url which will be called on completion of job to inform
|
|
||||||
end status of job.
|
|
||||||
User can give at most 2 variables with URI : $jobId and $jobStatus.
|
|
||||||
If they are present in URI, then they will be replaced by their
|
|
||||||
respective values.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.end-notification.retry.attempts</name>
|
|
||||||
<value>0</value>
|
|
||||||
<description>Indicates how many times hadoop should attempt to contact the
|
|
||||||
notification URL </description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.end-notification.retry.interval</name>
|
|
||||||
<value>30000</value>
|
|
||||||
<description>Indicates time in milliseconds between notification URL retry
|
|
||||||
calls</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.queuename</name>
|
<name>mapreduce.job.queuename</name>
|
||||||
<value>default</value>
|
<value>default</value>
|
||||||
@ -802,6 +772,34 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<!-- Job Notification Configuration -->
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.end-notification.url</name>
|
||||||
|
<!--<value>http://localhost:8080/jobstatus.php?jobId=$jobId&jobStatus=$jobStatus</value>-->
|
||||||
|
<description>Indicates url which will be called on completion of job to inform
|
||||||
|
end status of job.
|
||||||
|
User can give at most 2 variables with URI : $jobId and $jobStatus.
|
||||||
|
If they are present in URI, then they will be replaced by their
|
||||||
|
respective values.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.end-notification.retry.attempts</name>
|
||||||
|
<value>0</value>
|
||||||
|
<description>The number of times the submitter of the job wants to retry job
|
||||||
|
end notification if it fails. This is capped by
|
||||||
|
mapreduce.job.end-notification.max.attempts</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.job.end-notification.retry.interval</name>
|
||||||
|
<value>1000</value>
|
||||||
|
<description>The number of milliseconds the submitter of the job wants to
|
||||||
|
wait before job end notification is retried if it fails. This is capped by
|
||||||
|
mapreduce.job.end-notification.max.retry.interval</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.end-notification.max.attempts</name>
|
<name>mapreduce.job.end-notification.max.attempts</name>
|
||||||
<value>5</value>
|
<value>5</value>
|
||||||
@ -815,36 +813,12 @@
|
|||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.end-notification.max.retry.interval</name>
|
<name>mapreduce.job.end-notification.max.retry.interval</name>
|
||||||
<value>5</value>
|
<value>5000</value>
|
||||||
<final>true</final>
|
<final>true</final>
|
||||||
<description>The maximum amount of time (in seconds) to wait before retrying
|
<description>The maximum amount of time (in milliseconds) to wait before
|
||||||
job end notification. Cluster administrators can set this to limit how long
|
retrying job end notification. Cluster administrators can set this to
|
||||||
the Application Master waits before exiting. Must be marked as final to
|
limit how long the Application Master waits before exiting. Must be marked
|
||||||
prevent users from overriding this.</description>
|
as final to prevent users from overriding this.</description>
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.end-notification.url</name>
|
|
||||||
<value></value>
|
|
||||||
<description>The URL to send job end notification. It may contain sentinels
|
|
||||||
$jobId and $jobStatus which will be replaced with jobId and jobStatus.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.end-notification.retry.attempts</name>
|
|
||||||
<value>5</value>
|
|
||||||
<description>The number of times the submitter of the job wants to retry job
|
|
||||||
end notification if it fails. This is capped by
|
|
||||||
mapreduce.job.end-notification.max.attempts</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.end-notification.retry.interval</name>
|
|
||||||
<value>1</value>
|
|
||||||
<description>The number of seconds the submitter of the job wants to wait
|
|
||||||
before job end notification is retried if it fails. This is capped by
|
|
||||||
mapreduce.job.end-notification.max.retry.interval</description>
|
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
Loading…
Reference in New Issue
Block a user