From 07ee20503841a946eaa2e282fbf392efb15e7dd7 Mon Sep 17 00:00:00 2001
From: Arun Murthy <acmurthy@apache.org>
Date: Wed, 2 Oct 2013 16:08:45 +0000
Subject: [PATCH] MAPREDUCE-5530. Fix compat with hadoop-1 in
 mapred.lib.CombinFileInputFormat by re-introducing isSplittable(FileSystem,
 Path) api and ensuring semantic compatibility. Contributed by Robert Kanter.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528533 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  5 +++
 .../mapred/lib/CombineFileInputFormat.java    | 36 +++++++++++++++++++
 2 files changed, 41 insertions(+)
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 303fec6fcb..40852d7e25 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -260,6 +260,11 @@ Release 2.1.2 - UNRELEASED
     MAPREDUCE-5536. Fixed MR AM and JHS to respect
     mapreduce.jobhistory.webapp.https.address. (Omkar Vinit Joshi via vinodkv)
 
+    MAPREDUCE-5530. Fix compat with hadoop-1 in
+    mapred.lib.CombinFileInputFormat by re-introducing
+    isSplittable(FileSystem, Path) api and ensuring semantic compatibility.
+    (Robert Kanter via acmurthy)
+
 Release 2.1.1-beta - 2013-09-23
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
index 1401fc2970..b9297f851b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
@@ -24,13 +24,19 @@
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.SplittableCompressionCodec;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
 /**
@@ -127,4 +133,34 @@ protected FileStatus[] listStatus(JobConf job) throws IOException {
     return result.toArray(new FileStatus[result.size()]);
   }
 
+  /**
+   * Subclasses should avoid overriding this method and should instead only
+   * override {@link #isSplitable(FileSystem, Path)}.  The implementation of
+   * this method simply calls the other method to preserve compatibility.
+   * @see <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5530">
+   * MAPREDUCE-5530</a>
+   *
+   * @param context the job context
+   * @param file the file name to check
+   * @return is this file splitable?
+   */
+  @InterfaceAudience.Private
+  @Override
+  protected boolean isSplitable(JobContext context, Path file) {
+    try {
+      return isSplitable(FileSystem.get(context.getConfiguration()), file);
+    }
+    catch (IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+  }
+
+  protected boolean isSplitable(FileSystem fs, Path file) {
+    final CompressionCodec codec =
+      new CompressionCodecFactory(fs.getConf()).getCodec(file);
+    if (null == codec) {
+      return true;
+    }
+    return codec instanceof SplittableCompressionCodec;
+  }
 }