From 4c66882a694869352a7db04111da4e55046791a9 Mon Sep 17 00:00:00 2001 From: Devaraj Das Date: Mon, 31 Aug 2009 07:56:39 +0000 Subject: [PATCH] HADOOP-6224. Adds methods to read strings safely, makes the Buffer class in DataOutputBuffer public, and introduces public constructors there. These changes are required for MAPREDUCE-318. Contributed by Jothi Padmanabhan and Arun Murthy. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@809491 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 +++ .../apache/hadoop/io/DataOutputBuffer.java | 12 ++++++++- src/java/org/apache/hadoop/io/Text.java | 25 +++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 58dd9246c9..3a206a5fdf 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -507,6 +507,10 @@ Trunk (unreleased changes) HADOOP-6184. Provide an API to dump Configuration in a JSON format. (V.V.Chaitanya Krishna via yhemanth) + + HADOOP-6224. Adds methods to read strings safely, makes the Buffer class + in DataOutputBuffer public, and introduces public constructors there. These changes + are required for MAPREDUCE-318. (Jothi Padmanabhan and Arun Murthy via ddas) OPTIMIZATIONS diff --git a/src/java/org/apache/hadoop/io/DataOutputBuffer.java b/src/java/org/apache/hadoop/io/DataOutputBuffer.java index a7ad89839e..f6dadaf858 100644 --- a/src/java/org/apache/hadoop/io/DataOutputBuffer.java +++ b/src/java/org/apache/hadoop/io/DataOutputBuffer.java @@ -20,6 +20,8 @@ import java.io.*; +import org.apache.hadoop.io.DataOutputBuffer.Buffer; + /** A reusable {@link DataOutput} implementation that writes to an in-memory * buffer. * @@ -41,7 +43,7 @@ */ public class DataOutputBuffer extends DataOutputStream { - private static class Buffer extends ByteArrayOutputStream { + public static class Buffer extends ByteArrayOutputStream { public byte[] getData() { return buf; } public int getLength() { return count; } @@ -53,6 +55,10 @@ public Buffer(int size) { super(size); } + public Buffer(byte[] buf) { + super.buf = buf; + } + public void write(DataInput in, int len) throws IOException { int newcount = count + len; if (newcount > buf.length) { @@ -76,6 +82,10 @@ public DataOutputBuffer(int size) { this(new Buffer(size)); } + public DataOutputBuffer(byte[] buf) { + this(new Buffer(buf)); + } + private DataOutputBuffer(Buffer buffer) { super(buffer); this.buffer = buffer; diff --git a/src/java/org/apache/hadoop/io/Text.java b/src/java/org/apache/hadoop/io/Text.java index 2110a34f49..b267962eb6 100644 --- a/src/java/org/apache/hadoop/io/Text.java +++ b/src/java/org/apache/hadoop/io/Text.java @@ -35,6 +35,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.WritableUtils; /** This class stores text using standard UTF8 encoding. It provides methods * to serialize, deserialize, and compare texts at byte level. The type of @@ -403,6 +404,30 @@ public static String readString(DataInput in) throws IOException { in.readFully(bytes, 0, length); return decode(bytes); } + /** + * Read a string, but check it for sanity. The format consists of a vint + * followed by the given number of bytes. + * @param in the stream to read from + * @param maxLength the largest acceptable length of string + * @return the bytes as a string + * @throws IOException if reading from the DataInput fails + * @throws IllegalArgumentException if the string length is negative or + * larger than maxSize. Only the vint is read. + */ + public static String readStringSafely(DataInput in, + int maxLength + ) throws IOException, + IllegalArgumentException { + int length = WritableUtils.readVInt(in); + if (length < 0 || length > maxLength) { + throw new IllegalArgumentException("String size was " + length + + ", which is outside of 0.." + + maxLength); + } + byte [] bytes = new byte[length]; + in.readFully(bytes, 0, length); + return decode(bytes); + } /** Write a UTF8 encoded string to out */