HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in

GzipCodec. Contributed by Aaron Kimball


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@898711 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christopher Douglas 2010-01-13 10:46:39 +00:00
parent 8942119b04
commit 4cecab7c10
3 changed files with 169 additions and 3 deletions

View File

@ -1295,6 +1295,8 @@ Release 0.20.2 - Unreleased
HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
server on exceeding maximum response size to free up Java heap. (suresh)
HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. (Aaron Kimball via cdouglas)
Release 0.20.1 - 2009-09-01

View File

@ -165,7 +165,7 @@ public Compressor createCompressor() {
public Class<? extends Compressor> getCompressorType() {
return ZlibFactory.isNativeZlibLoaded(conf)
? GzipZlibCompressor.class
: BuiltInZlibDeflater.class;
: null;
}
public CompressionInputStream createInputStream(InputStream in)
@ -196,7 +196,7 @@ public Decompressor createDecompressor() {
public Class<? extends Decompressor> getDecompressorType() {
return ZlibFactory.isNativeZlibLoaded(conf)
? GzipZlibDecompressor.class
: BuiltInZlibInflater.class;
: null;
}
public String getDefaultExtension() {

View File

@ -19,13 +19,24 @@
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import java.util.Random;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
@ -41,6 +52,9 @@
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.CompressorStream;
import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
import org.apache.hadoop.io.compress.zlib.ZlibFactory;
@ -418,4 +432,154 @@ public static void main(String[] args) {
}
@Test
public void testCodecPoolAndGzipDecompressor() {
// BuiltInZlibInflater should not be used as the GzipCodec decompressor.
// Assert that this is the case.
// Don't use native libs for this test.
Configuration conf = new Configuration();
conf.setBoolean("hadoop.native.lib", false);
assertFalse("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
// This should give us a BuiltInZlibInflater.
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
assertTrue("ZlibFactory returned unexpected inflator",
zlibDecompressor instanceof BuiltInZlibInflater);
// Asking for a decompressor directly from GzipCodec should return null;
// its createOutputStream() just wraps the existing stream in a
// java.util.zip.GZIPOutputStream.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
Decompressor codecDecompressor = codec.createDecompressor();
if (null != codecDecompressor) {
fail("Got non-null codecDecompressor: " + codecDecompressor);
}
// Asking the CodecPool for a decompressor for GzipCodec
// should return null as well.
Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
if (null != poolDecompressor) {
fail("Got non-null poolDecompressor: " + poolDecompressor);
}
// If we then ensure that the pool is populated...
CodecPool.returnDecompressor(zlibDecompressor);
// Asking the pool another time should still not bind this to GzipCodec.
poolDecompressor = CodecPool.getDecompressor(codec);
if (null != poolDecompressor) {
fail("Second time, got non-null poolDecompressor: "
+ poolDecompressor);
}
}
@Test
public void testGzipCodecRead() throws IOException {
// Create a gzipped file and try to read it back, using a decompressor
// from the CodecPool.
// Don't use native libs for this test.
Configuration conf = new Configuration();
conf.setBoolean("hadoop.native.lib", false);
assertFalse("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
// Ensure that the CodecPool has a BuiltInZlibInflater in it.
Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
assertNotNull("zlibDecompressor is null!", zlibDecompressor);
assertTrue("ZlibFactory returned unexpected inflator",
zlibDecompressor instanceof BuiltInZlibInflater);
CodecPool.returnDecompressor(zlibDecompressor);
// Now create a GZip text file.
String tmpDir = System.getProperty("test.build.data", "/tmp/");
Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new GZIPOutputStream(new FileOutputStream(f.toString()))));
final String msg = "This is the message in the file!";
bw.write(msg);
bw.close();
// Now read it back, using the CodecPool to establish the
// decompressor to use.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileSystem fs = FileSystem.getLocal(conf);
InputStream is = fs.open(f);
is = codec.createInputStream(is, decompressor);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = br.readLine();
assertEquals("Didn't get the same message back!", msg, line);
br.close();
}
private void verifyGzipFile(String filename, String msg) throws IOException {
BufferedReader r = new BufferedReader(new InputStreamReader(
new GZIPInputStream(new FileInputStream(filename))));
try {
String line = r.readLine();
assertEquals("Got invalid line back from " + filename, msg, line);
} finally {
r.close();
new File(filename).delete();
}
}
@Test
public void testGzipCodecWrite() throws IOException {
// Create a gzipped file using a compressor from the CodecPool,
// and try to read it back via the regular GZIPInputStream.
// Don't use native libs for this test.
Configuration conf = new Configuration();
conf.setBoolean("hadoop.native.lib", false);
assertFalse("ZlibFactory is using native libs against request",
ZlibFactory.isNativeZlibLoaded(conf));
// Ensure that the CodecPool has a BuiltInZlibDeflater in it.
Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
assertNotNull("zlibCompressor is null!", zlibCompressor);
assertTrue("ZlibFactory returned unexpected deflator",
zlibCompressor instanceof BuiltInZlibDeflater);
CodecPool.returnCompressor(zlibCompressor);
// Create a GZIP text file via the Compressor interface.
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
final String msg = "This is the message we are going to compress.";
final String tmpDir = System.getProperty("test.build.data", "/tmp/");
final String fileName = new Path(new Path(tmpDir),
"testGzipCodecWrite.txt.gz").toString();
BufferedWriter w = null;
Compressor gzipCompressor = CodecPool.getCompressor(codec);
if (null != gzipCompressor) {
// If it gives us back a Compressor, we should be able to use this
// to write files we can then read back with Java's gzip tools.
OutputStream os = new CompressorStream(new FileOutputStream(fileName),
gzipCompressor);
w = new BufferedWriter(new OutputStreamWriter(os));
w.write(msg);
w.close();
CodecPool.returnCompressor(gzipCompressor);
verifyGzipFile(fileName, msg);
}
// Create a gzip text file via codec.getOutputStream().
w = new BufferedWriter(new OutputStreamWriter(
codec.createOutputStream(new FileOutputStream(fileName))));
w.write(msg);
w.close();
verifyGzipFile(fileName, msg);
}
}