From b5ca649bff01c906033d71c9f983b4cdaa71a9d1 Mon Sep 17 00:00:00 2001 From: Tsuyoshi Ozawa Date: Thu, 22 Oct 2015 11:55:25 +0900 Subject: [PATCH] HADOOP-9692. SequenceFile reader throws EOFException on zero-length file. Contributed by Zhe Zhang and Chu Tong. --- .../hadoop-common/CHANGES.txt | 3 +++ .../org/apache/hadoop/io/SequenceFile.java | 15 ++++++++++--- .../apache/hadoop/io/TestSequenceFile.java | 21 +++++++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7edf5cdd52..5acf369ff3 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1251,6 +1251,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12483. Maintain wrapped SASL ordering for postponed IPC responses. (Daryn Sharp via yliu) + HADOOP-9692. SequenceFile reader throws EOFException on zero-length file. + (Zhe Zhang and Chu Tong via ozawa) + OPTIMIZATIONS HADOOP-12051. ProtobufRpcEngine.invoke() should use Exception.toString() diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index e37e855ed2..ed57eee395 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -1912,17 +1912,26 @@ protected FSDataInputStream openFile(FileSystem fs, Path file, */ private void init(boolean tempReader) throws IOException { byte[] versionBlock = new byte[VERSION.length]; - in.readFully(versionBlock); + String exceptionMsg = this + " not a SequenceFile"; + + // Try to read sequence file header. + try { + in.readFully(versionBlock); + } catch (EOFException e) { + throw new EOFException(exceptionMsg); + } if ((versionBlock[0] != VERSION[0]) || (versionBlock[1] != VERSION[1]) || - (versionBlock[2] != VERSION[2])) + (versionBlock[2] != VERSION[2])) { throw new IOException(this + " not a SequenceFile"); + } // Set 'version' version = versionBlock[3]; - if (version > VERSION[3]) + if (version > VERSION[3]) { throw new VersionMismatchException(VERSION[3], version); + } if (version < BLOCK_COMPRESS_VERSION) { UTF8 className = new UTF8(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java index 7495c6ed66..99c97db548 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java @@ -522,6 +522,27 @@ protected FSDataInputStream openFile(FileSystem fs, Path file, int bufferSize, l assertTrue("InputStream for " + path + " should have been closed.", openedFile[0].isClosed()); } + /** + * Test to makes sure zero length sequence file is handled properly while + * initializing. + */ + public void testInitZeroLengthSequenceFile() throws IOException { + Configuration conf = new Configuration(); + LocalFileSystem fs = FileSystem.getLocal(conf); + + // create an empty file (which is not a valid sequence file) + Path path = new Path(System.getProperty("test.build.data", ".") + + "/zerolength.seq"); + fs.create(path).close(); + + try { + new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); + fail("IOException expected."); + } catch (IOException expected) { + assertTrue(expected instanceof EOFException); + } + } + /** * Test that makes sure createWriter succeeds on a file that was * already created