HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in

GzipCodec. Contributed by Aaron Kimball


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@898711 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher Douglas 2010-01-13 10:46:39 +00:00
parent 8942119b04
commit 4cecab7c10
3 changed files with 169 additions and 3 deletions

View File

@ -1295,6 +1295,8 @@ Release 0.20.2 - Unreleased
HADOOP-6460. Reinitializes buffers used for serializing responses in ipc HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
server on exceeding maximum response size to free up Java heap. (suresh) server on exceeding maximum response size to free up Java heap. (suresh)
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. (Aaron Kimball via cdouglas)
Release 0.20.1 - 2009-09-01 Release 0.20.1 - 2009-09-01

View File

@ -165,7 +165,7 @@ public Compressor createCompressor() {
public Class<? extends Compressor> getCompressorType() { public Class<? extends Compressor> getCompressorType() {
return ZlibFactory.isNativeZlibLoaded(conf) return ZlibFactory.isNativeZlibLoaded(conf)
? GzipZlibCompressor.class ? GzipZlibCompressor.class
: BuiltInZlibDeflater.class; : null;
} }
public CompressionInputStream createInputStream(InputStream in) public CompressionInputStream createInputStream(InputStream in)
@ -196,7 +196,7 @@ public Decompressor createDecompressor() {
public Class<? extends Decompressor> getDecompressorType() { public Class<? extends Decompressor> getDecompressorType() {
return ZlibFactory.isNativeZlibLoaded(conf) return ZlibFactory.isNativeZlibLoaded(conf)
? GzipZlibDecompressor.class ? GzipZlibDecompressor.class
: BuiltInZlibInflater.class; : null;
} }
public String getDefaultExtension() { public String getDefaultExtension() {

View File

@ -19,13 +19,24 @@
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Arrays; import java.util.Arrays;
import java.util.Random; import java.util.Random;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
@ -41,6 +52,9 @@
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.CompressorStream;
import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel; import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy; import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.io.compress.zlib.ZlibFactory;
@ -418,4 +432,154 @@ public static void main(String[] args) {
} }
@Test
public void testCodecPoolAndGzipDecompressor() {
// BuiltInZlibInflater should not be used as the GzipCodec decompressor.
// Assert that this is the case.
// Don't use native libs for this test.
Configuration conf = new Configuration();
conf.setBoolean("hadoop.native.lib", false);
assertFalse("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
// This should give us a BuiltInZlibInflater.
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
assertTrue("ZlibFactory returned unexpected inflator",
zlibDecompressor instanceof BuiltInZlibInflater);
// Asking for a decompressor directly from GzipCodec should return null;
// its createOutputStream() just wraps the existing stream in a
// java.util.zip.GZIPOutputStream.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
Decompressor codecDecompressor = codec.createDecompressor();
if (null != codecDecompressor) {
fail("Got non-null codecDecompressor: " + codecDecompressor);
}
// Asking the CodecPool for a decompressor for GzipCodec
// should return null as well.
Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
if (null != poolDecompressor) {
fail("Got non-null poolDecompressor: " + poolDecompressor);
}
// If we then ensure that the pool is populated...
CodecPool.returnDecompressor(zlibDecompressor);
// Asking the pool another time should still not bind this to GzipCodec.
poolDecompressor = CodecPool.getDecompressor(codec);
if (null != poolDecompressor) {
fail("Second time, got non-null poolDecompressor: "
+ poolDecompressor);
}
}
@Test
public void testGzipCodecRead() throws IOException {
// Create a gzipped file and try to read it back, using a decompressor
// from the CodecPool.
// Don't use native libs for this test.
Configuration conf = new Configuration();
conf.setBoolean("hadoop.native.lib", false);
assertFalse("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
// Ensure that the CodecPool has a BuiltInZlibInflater in it.
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
assertTrue("ZlibFactory returned unexpected inflator",
zlibDecompressor instanceof BuiltInZlibInflater);
CodecPool.returnDecompressor(zlibDecompressor);
// Now create a GZip text file.
String tmpDir = System.getProperty("test.build.data", "/tmp/");
Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new GZIPOutputStream(new FileOutputStream(f.toString()))));
final String msg = "This is the message in the file!";
bw.write(msg);
bw.close();
// Now read it back, using the CodecPool to establish the
// decompressor to use.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileSystem fs = FileSystem.getLocal(conf);
InputStream is = fs.open(f);
is = codec.createInputStream(is, decompressor);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = br.readLine();
assertEquals("Didn't get the same message back!", msg, line);
br.close();
}
private void verifyGzipFile(String filename, String msg) throws IOException {
BufferedReader r = new BufferedReader(new InputStreamReader(
new GZIPInputStream(new FileInputStream(filename))));
try {
String line = r.readLine();
assertEquals("Got invalid line back from " + filename, msg, line);
} finally {
r.close();
new File(filename).delete();
}
}
@Test
public void testGzipCodecWrite() throws IOException {
// Create a gzipped file using a compressor from the CodecPool,
// and try to read it back via the regular GZIPInputStream.
// Don't use native libs for this test.
Configuration conf = new Configuration();
conf.setBoolean("hadoop.native.lib", false);
assertFalse("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
// Ensure that the CodecPool has a BuiltInZlibDeflater in it.
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
assertNotNull("zlibCompressor is null!", zlibCompressor);
assertTrue("ZlibFactory returned unexpected deflator",
zlibCompressor instanceof BuiltInZlibDeflater);
CodecPool.returnCompressor(zlibCompressor);
// Create a GZIP text file via the Compressor interface.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
final String msg = "This is the message we are going to compress.";
final String tmpDir = System.getProperty("test.build.data", "/tmp/");
final String fileName = new Path(new Path(tmpDir),
"testGzipCodecWrite.txt.gz").toString();
BufferedWriter w = null;
Compressor gzipCompressor = CodecPool.getCompressor(codec);
if (null != gzipCompressor) {
// If it gives us back a Compressor, we should be able to use this
// to write files we can then read back with Java's gzip tools.
OutputStream os = new CompressorStream(new FileOutputStream(fileName),
gzipCompressor);
w = new BufferedWriter(new OutputStreamWriter(os));
w.write(msg);
w.close();
CodecPool.returnCompressor(gzipCompressor);
verifyGzipFile(fileName, msg);
}
// Create a gzip text file via codec.getOutputStream().
w = new BufferedWriter(new OutputStreamWriter(
codec.createOutputStream(new FileOutputStream(fileName))));
w.write(msg);
w.close();
verifyGzipFile(fileName, msg);
}
} }