From 6681523c870541390864e021cbe1908b6797f622 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Fri, 7 Dec 2012 08:18:14 +0000 Subject: [PATCH] HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1418214 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/io/SequenceFile.java | 4 +-- .../main/java/org/apache/hadoop/io/UTF8.java | 34 +++++++++++++++++-- .../java/org/apache/hadoop/io/TestUTF8.java | 27 ++++++++++++--- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/namenode/FSImageSerialization.java | 2 +- 5 files changed, 60 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index 8a14860773..2d42a93997 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -1858,10 +1858,10 @@ private void init(boolean tempReader) throws IOException { UTF8 className = new UTF8(); className.readFields(in); - keyClassName = className.toString(); // key class name + keyClassName = className.toStringChecked(); // key class name className.readFields(in); - valClassName = className.toString(); // val class name + valClassName = className.toStringChecked(); // val class name } else { keyClassName = Text.readString(in); valClassName = Text.readString(in); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java index 4124949a4f..89f1e428bb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.DataInput; import java.io.DataOutput; +import java.io.UTFDataFormatException; import org.apache.hadoop.util.StringUtils; @@ -155,6 +156,21 @@ public String toString() { } return buffer.toString(); } + + /** + * Convert to a string, checking for valid UTF8. + * @return the converted string + * @throws UTFDataFormatException if the underlying bytes contain invalid + * UTF8 data. + */ + public String toStringChecked() throws IOException { + StringBuilder buffer = new StringBuilder(length); + synchronized (IBUF) { + IBUF.reset(bytes, length); + readChars(IBUF, buffer, length); + } + return buffer.toString(); + } /** Returns true iff o is a UTF8 with the same contents. */ @Override @@ -238,7 +254,7 @@ public static String readString(DataInput in) throws IOException { } private static void readChars(DataInput in, StringBuilder buffer, int nBytes) - throws IOException { + throws UTFDataFormatException, IOException { DataOutputBuffer obuf = OBUF_FACTORY.get(); obuf.reset(); obuf.write(in, nBytes); @@ -250,15 +266,27 @@ private static void readChars(DataInput in, StringBuilder buffer, int nBytes) // 0b0xxxxxxx: 1-byte sequence buffer.append((char)(b & 0x7F)); } else if ((b & 0xE0) == 0xC0) { + if (i >= nBytes) { + throw new UTFDataFormatException("Truncated UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, 1)); + } // 0b110xxxxx: 2-byte sequence buffer.append((char)(((b & 0x1F) << 6) | (bytes[i++] & 0x3F))); } else if ((b & 0xF0) == 0xE0) { // 0b1110xxxx: 3-byte sequence + if (i + 1 >= nBytes) { + throw new UTFDataFormatException("Truncated UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, 2)); + } buffer.append((char)(((b & 0x0F) << 12) | ((bytes[i++] & 0x3F) << 6) | (bytes[i++] & 0x3F))); } else if ((b & 0xF8) == 0xF0) { + if (i + 2 >= nBytes) { + throw new UTFDataFormatException("Truncated UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, 3)); + } // 0b11110xxx: 4-byte sequence int codepoint = ((b & 0x07) << 18) @@ -274,8 +302,8 @@ private static void readChars(DataInput in, StringBuilder buffer, int nBytes) // Only show the next 6 bytes max in the error code - in case the // buffer is large, this will prevent an exceedingly large message. int endForError = Math.min(i + 5, nBytes); - throw new IOException("Invalid UTF8 at " + - StringUtils.byteToHexString(bytes, i - 1, endForError)); + throw new UTFDataFormatException("Invalid UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, endForError)); } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java index 902f215d06..b387224832 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java @@ -20,6 +20,7 @@ import junit.framework.TestCase; import java.io.IOException; +import java.io.UTFDataFormatException; import java.util.Random; import org.apache.hadoop.test.GenericTestUtils; @@ -126,9 +127,9 @@ public void testInvalidUTF8() throws Exception { try { UTF8.fromBytes(invalid); fail("did not throw an exception"); - } catch (IOException ioe) { + } catch (UTFDataFormatException utfde) { GenericTestUtils.assertExceptionContains( - "Invalid UTF8 at ffff01020304", ioe); + "Invalid UTF8 at ffff01020304", utfde); } } @@ -142,9 +143,27 @@ public void test5ByteUtf8Sequence() throws Exception { try { UTF8.fromBytes(invalid); fail("did not throw an exception"); - } catch (IOException ioe) { + } catch (UTFDataFormatException utfde) { GenericTestUtils.assertExceptionContains( - "Invalid UTF8 at f88880808004", ioe); + "Invalid UTF8 at f88880808004", utfde); + } + } + + /** + * Test that decoding invalid UTF8 due to truncation yields the correct + * exception type. + */ + public void testInvalidUTF8Truncated() throws Exception { + // Truncated CAT FACE character -- this is a 4-byte sequence, but we + // only have the first three bytes. + byte[] truncated = new byte[] { + (byte)0xF0, (byte)0x9F, (byte)0x90 }; + try { + UTF8.fromBytes(truncated); + fail("did not throw an exception"); + } catch (UTFDataFormatException utfde) { + GenericTestUtils.assertExceptionContains( + "Truncated UTF8 at f09f90", utfde); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0c346c7b28..bb3099d955 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -569,6 +569,9 @@ Release 2.0.3-alpha - Unreleased HDFS-4238. Standby namenode should not do purging of shared storage edits. (todd) + HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test + (todd) + BREAKDOWN OF HDFS-3077 SUBTASKS HDFS-3077. Quorum-based protocol for reading and writing edit logs. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index 7eda24948a..5649833db7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -197,7 +197,7 @@ static void saveINode2Image(INode node, public static String readString(DataInputStream in) throws IOException { DeprecatedUTF8 ustr = TL_DATA.get().U_STR; ustr.readFields(in); - return ustr.toString(); + return ustr.toStringChecked(); } static String readString_EmptyAsNull(DataInputStream in) throws IOException {