From 07ee20503841a946eaa2e282fbf392efb15e7dd7 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Wed, 2 Oct 2013 16:08:45 +0000 Subject: [PATCH] MAPREDUCE-5530. Fix compat with hadoop-1 in mapred.lib.CombinFileInputFormat by re-introducing isSplittable(FileSystem, Path) api and ensuring semantic compatibility. Contributed by Robert Kanter. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528533 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 5 +++ .../mapred/lib/CombineFileInputFormat.java | 36 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 303fec6fcb..40852d7e25 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -260,6 +260,11 @@ Release 2.1.2 - UNRELEASED MAPREDUCE-5536. Fixed MR AM and JHS to respect mapreduce.jobhistory.webapp.https.address. (Omkar Vinit Joshi via vinodkv) + MAPREDUCE-5530. Fix compat with hadoop-1 in + mapred.lib.CombinFileInputFormat by re-introducing + isSplittable(FileSystem, Path) api and ensuring semantic compatibility. + (Robert Kanter via acmurthy) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java index 1401fc2970..b9297f851b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java @@ -24,13 +24,19 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.SplittableCompressionCodec; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.TaskAttemptContext; /** @@ -127,4 +133,34 @@ protected FileStatus[] listStatus(JobConf job) throws IOException { return result.toArray(new FileStatus[result.size()]); } + /** + * Subclasses should avoid overriding this method and should instead only + * override {@link #isSplitable(FileSystem, Path)}. The implementation of + * this method simply calls the other method to preserve compatibility. + * @see + * MAPREDUCE-5530 + * + * @param context the job context + * @param file the file name to check + * @return is this file splitable? + */ + @InterfaceAudience.Private + @Override + protected boolean isSplitable(JobContext context, Path file) { + try { + return isSplitable(FileSystem.get(context.getConfiguration()), file); + } + catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + + protected boolean isSplitable(FileSystem fs, Path file) { + final CompressionCodec codec = + new CompressionCodecFactory(fs.getConf()).getCodec(file); + if (null == codec) { + return true; + } + return codec instanceof SplittableCompressionCodec; + } }