HADOOP-18029: Update CompressionCodecFactory to handle uppercase file extensions (#3739)
Co-authored-by: Desmond Sisson <sissonde@amazon.com>
This commit is contained in:
parent
4dea4a7b67
commit
df4197592f
@ -57,7 +57,7 @@ public class CompressionCodecFactory {
|
||||
private Map<String, CompressionCodec> codecsByName = null;
|
||||
|
||||
/**
|
||||
* A map from class names to the codecs
|
||||
* A map from class names to the codecs.
|
||||
*/
|
||||
private HashMap<String, CompressionCodec> codecsByClassName = null;
|
||||
|
||||
@ -110,8 +110,8 @@ public String toString() {
|
||||
*/
|
||||
public static List<Class<? extends CompressionCodec>> getCodecClasses(
|
||||
Configuration conf) {
|
||||
List<Class<? extends CompressionCodec>> result
|
||||
= new ArrayList<Class<? extends CompressionCodec>>();
|
||||
List<Class<? extends CompressionCodec>> result =
|
||||
new ArrayList<Class<? extends CompressionCodec>>();
|
||||
// Add codec classes discovered via service loading
|
||||
synchronized (CODEC_PROVIDERS) {
|
||||
// CODEC_PROVIDERS is a lazy collection. Synchronize so it is
|
||||
@ -200,11 +200,13 @@ public CompressionCodec getCodec(Path file) {
|
||||
String filename = file.getName();
|
||||
String reversedFilename =
|
||||
new StringBuilder(filename).reverse().toString();
|
||||
String lowerReversedFilename =
|
||||
StringUtils.toLowerCase(reversedFilename);
|
||||
SortedMap<String, CompressionCodec> subMap =
|
||||
codecs.headMap(reversedFilename);
|
||||
codecs.headMap(lowerReversedFilename);
|
||||
if (!subMap.isEmpty()) {
|
||||
String potentialSuffix = subMap.lastKey();
|
||||
if (reversedFilename.startsWith(potentialSuffix)) {
|
||||
if (lowerReversedFilename.startsWith(potentialSuffix)) {
|
||||
result = codecs.get(potentialSuffix);
|
||||
}
|
||||
}
|
||||
@ -323,8 +325,12 @@ public static void main(String[] args) throws Exception {
|
||||
len = in.read(buffer);
|
||||
}
|
||||
} finally {
|
||||
if(out != null) { out.close(); }
|
||||
if(in != null) { in.close(); }
|
||||
if(out != null) {
|
||||
out.close();
|
||||
}
|
||||
if(in != null) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CompressionInputStream in = null;
|
||||
@ -338,7 +344,9 @@ public static void main(String[] args) throws Exception {
|
||||
len = in.read(buffer);
|
||||
}
|
||||
} finally {
|
||||
if(in != null) { in.close(); }
|
||||
if(in != null) {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -29,6 +29,7 @@
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
public class TestCodecFactory {
|
||||
@ -125,7 +126,7 @@ public String getDefaultExtension() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a factory for a given set of codecs
|
||||
* Returns a factory for a given set of codecs.
|
||||
* @param classes the codec classes to include
|
||||
* @return a new factory
|
||||
*/
|
||||
@ -137,10 +138,16 @@ private static CompressionCodecFactory setClasses(Class[] classes) {
|
||||
|
||||
private static void checkCodec(String msg,
|
||||
Class expected, CompressionCodec actual) {
|
||||
if (expected == null) {
|
||||
assertNull(msg, actual);
|
||||
} else if (actual == null) {
|
||||
fail(msg + " result was null");
|
||||
} else {
|
||||
assertEquals(msg + " unexpected codec found",
|
||||
expected.getName(),
|
||||
actual.getClass().getName());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFinding() {
|
||||
@ -153,6 +160,8 @@ public void testFinding() {
|
||||
|
||||
codec = factory.getCodec(new Path("/tmp/foo.gz"));
|
||||
checkCodec("default factory for .gz", GzipCodec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/foo.GZ"));
|
||||
checkCodec("default factory for .GZ", GzipCodec.class, codec);
|
||||
codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
|
||||
checkCodec("default factory for gzip codec", GzipCodec.class, codec);
|
||||
codec = factory.getCodecByName("gzip");
|
||||
@ -168,6 +177,8 @@ public void testFinding() {
|
||||
|
||||
codec = factory.getCodec(new Path("/tmp/foo.bz2"));
|
||||
checkCodec("default factory for .bz2", BZip2Codec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/foo.BZ2"));
|
||||
checkCodec("default factory for .BZ2", BZip2Codec.class, codec);
|
||||
codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
|
||||
checkCodec("default factory for bzip2 codec", BZip2Codec.class, codec);
|
||||
codec = factory.getCodecByName("bzip2");
|
||||
@ -221,16 +232,22 @@ public void testFinding() {
|
||||
FooBarCodec.class});
|
||||
codec = factory.getCodec(new Path("/tmp/.foo.bar.gz"));
|
||||
checkCodec("full factory gz codec", GzipCodec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/.foo.bar.GZ"));
|
||||
checkCodec("full factory GZ codec", GzipCodec.class, codec);
|
||||
codec = factory.getCodecByClassName(GzipCodec.class.getCanonicalName());
|
||||
checkCodec("full codec gz codec", GzipCodec.class, codec);
|
||||
|
||||
codec = factory.getCodec(new Path("/tmp/foo.bz2"));
|
||||
checkCodec("full factory for .bz2", BZip2Codec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/foo.BZ2"));
|
||||
checkCodec("full factory for .BZ2", BZip2Codec.class, codec);
|
||||
codec = factory.getCodecByClassName(BZip2Codec.class.getCanonicalName());
|
||||
checkCodec("full codec bzip2 codec", BZip2Codec.class, codec);
|
||||
|
||||
codec = factory.getCodec(new Path("/tmp/foo.bar"));
|
||||
checkCodec("full factory bar codec", BarCodec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/foo.BAR"));
|
||||
checkCodec("full factory BAR codec", BarCodec.class, codec);
|
||||
codec = factory.getCodecByClassName(BarCodec.class.getCanonicalName());
|
||||
checkCodec("full factory bar codec", BarCodec.class, codec);
|
||||
codec = factory.getCodecByName("bar");
|
||||
@ -240,6 +257,8 @@ public void testFinding() {
|
||||
|
||||
codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar"));
|
||||
checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/foo/baz.FOO.bar"));
|
||||
checkCodec("full factory FOO bar codec", FooBarCodec.class, codec);
|
||||
codec = factory.getCodecByClassName(FooBarCodec.class.getCanonicalName());
|
||||
checkCodec("full factory foo bar codec", FooBarCodec.class, codec);
|
||||
codec = factory.getCodecByName("foobar");
|
||||
@ -249,6 +268,8 @@ public void testFinding() {
|
||||
|
||||
codec = factory.getCodec(new Path("/tmp/foo.foo"));
|
||||
checkCodec("full factory foo codec", FooCodec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/FOO.FOO"));
|
||||
checkCodec("full factory FOO codec", FooCodec.class, codec);
|
||||
codec = factory.getCodecByClassName(FooCodec.class.getCanonicalName());
|
||||
checkCodec("full factory foo codec", FooCodec.class, codec);
|
||||
codec = factory.getCodecByName("foo");
|
||||
@ -259,6 +280,8 @@ public void testFinding() {
|
||||
factory = setClasses(new Class[]{NewGzipCodec.class});
|
||||
codec = factory.getCodec(new Path("/tmp/foo.gz"));
|
||||
checkCodec("overridden factory for .gz", NewGzipCodec.class, codec);
|
||||
codec = factory.getCodec(new Path("/tmp/foo.GZ"));
|
||||
checkCodec("overridden factory for .GZ", NewGzipCodec.class, codec);
|
||||
codec = factory.getCodecByClassName(NewGzipCodec.class.getCanonicalName());
|
||||
checkCodec("overridden factory for gzip codec", NewGzipCodec.class, codec);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user