diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 1b7f5fa448..961dc88675 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -657,6 +657,9 @@ Release 0.23.5 - UNRELEASED MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe via bobby) + + MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by + default (Ravi Prakash via bobby) Release 0.23.4 - UNRELEASED diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java index ca7f625c27..518305f958 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java @@ -53,7 +53,7 @@ public class JobEndNotifier implements Configurable { protected String userUrl; protected String proxyConf; protected int numTries; //Number of tries to attempt notification - protected int waitInterval; //Time to wait between retrying notification + protected int waitInterval; //Time (ms) to wait between retrying notification protected URL urlToNotify; //URL to notify read from the config protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification @@ -71,10 +71,10 @@ public void setConf(Configuration conf) { , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1) ); waitInterval = Math.min( - conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5) - , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5) + conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000) + , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000) ); - waitInterval = (waitInterval < 0) ? 5 : waitInterval; + waitInterval = (waitInterval < 0) ? 5000 : waitInterval; userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java index 6d92d0de73..65acc623c3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java @@ -55,22 +55,22 @@ private void testNumRetries(Configuration conf) { //Test maximum retry interval is capped by //MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL private void testWaitInterval(Configuration conf) { - conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5"); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1"); + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000"); setConf(conf); - Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval, - waitInterval == 1); + Assert.assertTrue("Expected waitInterval to be 1000, but was " + + waitInterval, waitInterval == 1000); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000"); setConf(conf); - Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval, - waitInterval == 5); + Assert.assertTrue("Expected waitInterval to be 5000, but was " + + waitInterval, waitInterval == 5000); //Test negative numbers are set to default conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10"); setConf(conf); - Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval, - waitInterval == 5); + Assert.assertTrue("Expected waitInterval to be 5000, but was " + + waitInterval, waitInterval == 5000); } private void testProxyConfiguration(Configuration conf) { @@ -125,17 +125,28 @@ protected boolean notifyURLOnce() { public void testNotifyRetries() throws InterruptedException { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent"); - conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3"); - conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3"); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000"); - conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000"); JobReport jobReport = Mockito.mock(JobReport.class); - + long startTime = System.currentTimeMillis(); this.notificationCount = 0; this.setConf(conf); this.notify(jobReport); long endTime = System.currentTimeMillis(); + Assert.assertEquals("Only 1 try was expected but was : " + + this.notificationCount, this.notificationCount, 1); + Assert.assertTrue("Should have taken more than 5 seconds it took " + + (endTime - startTime), endTime - startTime > 5000); + + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000"); + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000"); + + startTime = System.currentTimeMillis(); + this.notificationCount = 0; + this.setConf(conf); + this.notify(jobReport); + endTime = System.currentTimeMillis(); Assert.assertEquals("Only 3 retries were expected but was : " + this.notificationCount, this.notificationCount, 3); Assert.assertTrue("Should have taken more than 9 seconds it took " diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 8852d3980b..85330457ae 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -610,36 +610,6 @@ - - - - - - mapreduce.job.end-notification.retry.attempts - 0 - Indicates how many times hadoop should attempt to contact the - notification URL - - - - mapreduce.job.end-notification.retry.interval - 30000 - Indicates time in milliseconds between notification URL retry - calls - - - mapreduce.job.queuename default @@ -802,6 +772,34 @@ + + + mapreduce.job.end-notification.url + + Indicates url which will be called on completion of job to inform + end status of job. + User can give at most 2 variables with URI : $jobId and $jobStatus. + If they are present in URI, then they will be replaced by their + respective values. + + + + + mapreduce.job.end-notification.retry.attempts + 0 + The number of times the submitter of the job wants to retry job + end notification if it fails. This is capped by + mapreduce.job.end-notification.max.attempts + + + + mapreduce.job.end-notification.retry.interval + 1000 + The number of milliseconds the submitter of the job wants to + wait before job end notification is retried if it fails. This is capped by + mapreduce.job.end-notification.max.retry.interval + + mapreduce.job.end-notification.max.attempts 5 @@ -815,36 +813,12 @@ mapreduce.job.end-notification.max.retry.interval - 5 + 5000 true - The maximum amount of time (in seconds) to wait before retrying - job end notification. Cluster administrators can set this to limit how long - the Application Master waits before exiting. Must be marked as final to - prevent users from overriding this. - - - - mapreduce.job.end-notification.url - - The URL to send job end notification. It may contain sentinels - $jobId and $jobStatus which will be replaced with jobId and jobStatus. - - - - - mapreduce.job.end-notification.retry.attempts - 5 - The number of times the submitter of the job wants to retry job - end notification if it fails. This is capped by - mapreduce.job.end-notification.max.attempts - - - - mapreduce.job.end-notification.retry.interval - 1 - The number of seconds the submitter of the job wants to wait - before job end notification is retried if it fails. This is capped by - mapreduce.job.end-notification.max.retry.interval + The maximum amount of time (in milliseconds) to wait before + retrying job end notification. Cluster administrators can set this to + limit how long the Application Master waits before exiting. Must be marked + as final to prevent users from overriding this.