From 5795fcfd9904431ec075fdce7ab8559ff50eccd2 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Sat, 22 Oct 2011 06:14:46 +0000 Subject: [PATCH] MAPREDUCE-3058. Fixed MR YarnChild to report failure when task throws an error and thus prevent a hanging task and job. (vinodkv) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1187654 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/mapred/YarnChild.java | 2 +- .../java/org/apache/hadoop/FailingMapper.java | 16 ++++++++++++++++ .../apache/hadoop/mapreduce/v2/TestMRJobs.java | 1 - .../apache/hadoop/mapred/gridmix/LoadJob.java | 2 ++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 6f4c7a3553..b29e5fb3d8 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -1729,6 +1729,9 @@ Release 0.23.0 - Unreleased MAPREDUCE-3242. Trunk compilation broken with bad interaction from MAPREDUCE-3070 and MAPREDUCE-3239. (mahadev) + MAPREDUCE-3058. Fixed MR YarnChild to report failure when task throws an + error and thus prevent a hanging task and job. (vinodkv) + Release 0.22.0 - Unreleased INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java index 0ab220bf38..60e7418b68 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java @@ -177,7 +177,7 @@ public Object run() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); exception.printStackTrace(new PrintStream(baos)); if (taskid != null) { - umbilical.reportDiagnosticInfo(taskid, baos.toString()); + umbilical.fatalError(taskid, baos.toString()); } } catch (Throwable throwable) { LOG.fatal("Error running child : " diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/FailingMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/FailingMapper.java index e9502b1357..33a60681a3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/FailingMapper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/FailingMapper.java @@ -30,6 +30,22 @@ public class FailingMapper extends Mapper { public void map(Text key, Text value, Context context) throws IOException,InterruptedException { + + // Just create a non-daemon thread which hangs forever. MR AM should not be + // hung by this. + new Thread() { + @Override + public void run() { + synchronized (this) { + try { + wait(); + } catch (InterruptedException e) { + // + } + } + } + }.start(); + if (context.getTaskAttemptID().getId() == 0) { System.out.println("Attempt:" + context.getTaskAttemptID() + " Failing mapper throwing exception"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index 50819f3537..d26a441c7a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -299,7 +299,6 @@ protected Job runFailingMapperJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration myConf = new Configuration(mrCluster.getConfig()); myConf.setInt(MRJobConfig.NUM_MAPS, 1); - myConf.setInt("mapreduce.task.timeout", 10*1000);//reduce the timeout myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); //reduce the number of attempts Job job = new Job(myConf); diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/LoadJob.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/LoadJob.java index d0d2f9183f..ae2c8814f2 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/LoadJob.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/LoadJob.java @@ -351,9 +351,11 @@ protected void setup(Context ctxt) matcher = new ResourceUsageMatcherRunner(ctxt, split.getMapResourceUsageMetrics()); + matcher.setDaemon(true); // start the status reporter thread reporter = new StatusReporter(ctxt, matcher); + reporter.setDaemon(true); reporter.start(); }