From caf9fe2f1370c0423ade23e21c05e1e466fbc7a3 Mon Sep 17 00:00:00 2001 From: Harsh J Date: Tue, 10 Jul 2012 19:58:49 +0000 Subject: [PATCH] HADOOP-8423. MapFile.Reader.get() crashes jvm or throws EOFException on Snappy or LZO block-compressed data. Contributed by Todd Lipcon. (harsh) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1359866 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 4 ++ .../io/compress/BlockDecompressorStream.java | 2 + .../apache/hadoop/io/compress/TestCodec.java | 46 +++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index ab380827be..7ddb7ef21c 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -242,6 +242,10 @@ Branch-2 ( Unreleased changes ) HADOOP-8533. Remove parallel call ununsed capability in RPC. (Brandon Li via suresh) + HADOOP-8423. MapFile.Reader.get() crashes jvm or throws + EOFException on Snappy or LZO block-compressed data + (todd via harsh) + BUG FIXES HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java index d4765731a2..42ade89019 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BlockDecompressorStream.java @@ -127,6 +127,8 @@ protected int getCompressedData() throws IOException { } public void resetState() throws IOException { + originalBlockSize = 0; + noUncompressedBytes = 0; super.resetState(); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java index 67254fe758..1d0801c02e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java @@ -46,6 +46,7 @@ import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.RandomDatum; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; @@ -68,6 +69,7 @@ import org.apache.commons.logging.LogFactory; import org.junit.Assert; +import org.junit.Assume; import org.junit.Test; import static org.junit.Assert.*; @@ -514,6 +516,50 @@ private static void sequenceFileCodecTest(Configuration conf, int lines, LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\""); } + /** + * Regression test for HADOOP-8423: seeking in a block-compressed + * stream would not properly reset the block decompressor state. + */ + @Test + public void testSnappyMapFile() throws Exception { + Assume.assumeTrue(SnappyCodec.isNativeCodeLoaded()); + codecTestMapFile(SnappyCodec.class, CompressionType.BLOCK, 100); + } + + private void codecTestMapFile(Class clazz, + CompressionType type, int records) throws Exception { + + FileSystem fs = FileSystem.get(conf); + LOG.info("Creating MapFiles with " + records + + " records using codec " + clazz.getSimpleName()); + Path path = new Path(new Path( + System.getProperty("test.build.data", "/tmp")), + clazz.getSimpleName() + "-" + type + "-" + records); + + LOG.info("Writing " + path); + createMapFile(conf, fs, path, clazz.newInstance(), type, records); + MapFile.Reader reader = new MapFile.Reader(path, conf); + Text key1 = new Text("002"); + assertNotNull(reader.get(key1, new Text())); + Text key2 = new Text("004"); + assertNotNull(reader.get(key2, new Text())); + } + + private static void createMapFile(Configuration conf, FileSystem fs, Path path, + CompressionCodec codec, CompressionType type, int records) throws IOException { + MapFile.Writer writer = + new MapFile.Writer(conf, path, + MapFile.Writer.keyClass(Text.class), + MapFile.Writer.valueClass(Text.class), + MapFile.Writer.compression(type, codec)); + Text key = new Text(); + for (int j = 0; j < records; j++) { + key.set(String.format("%03d", j)); + writer.append(key, key); + } + writer.close(); + } + public static void main(String[] args) throws IOException { int count = 10000; String codecClass = "org.apache.hadoop.io.compress.DefaultCodec";