diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java index 7d7b75ba05..a81bc24e89 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/BytesWritable.java @@ -19,6 +19,9 @@ package org.apache.hadoop.io; import java.io.IOException; +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import java.io.DataInput; import java.io.DataOutput; @@ -36,16 +39,20 @@ public class BytesWritable extends BinaryComparable implements WritableComparable { private static final int LENGTH_BYTES = 4; - private static final byte[] EMPTY_BYTES = {}; + + private static final byte[] EMPTY_BYTES = new byte[0]; private int size; private byte[] bytes; - + /** * Create a zero-size sequence. */ - public BytesWritable() {this(EMPTY_BYTES);} - + public BytesWritable() { + this.bytes = EMPTY_BYTES; + this.size = 0; + } + /** * Create a BytesWritable using the byte array as the initial value. * @param bytes This array becomes the backing storage for the object. @@ -65,17 +72,15 @@ public BytesWritable(byte[] bytes, int length) { this.bytes = bytes; this.size = length; } - + /** * Get a copy of the bytes that is exactly the length of the data. * See {@link #getBytes()} for faster access to the underlying array. */ public byte[] copyBytes() { - byte[] result = new byte[size]; - System.arraycopy(bytes, 0, result, 0, size); - return result; + return Arrays.copyOf(bytes, size); } - + /** * Get the data backing the BytesWritable. Please use {@link #copyBytes()} * if you need the returned array to be precisely the length of the data. @@ -111,7 +116,7 @@ public int getLength() { public int getSize() { return getLength(); } - + /** * Change the size of the buffer. The values in the old range are preserved * and any new values are undefined. The capacity is changed if it is @@ -126,36 +131,32 @@ public void setSize(int size) { } this.size = size; } - + /** * Get the capacity, which is the maximum size that could handled without * resizing the backing storage. + * * @return The number of bytes */ public int getCapacity() { return bytes.length; } - + /** - * Change the capacity of the backing storage. - * The data is preserved. - * @param new_cap The new capacity in bytes. + * Change the capacity of the backing storage. The data is preserved. + * + * @param capacity The new capacity in bytes. */ - public void setCapacity(int new_cap) { - if (new_cap != getCapacity()) { - byte[] new_data = new byte[new_cap]; - if (new_cap < size) { - size = new_cap; - } - if (size != 0) { - System.arraycopy(bytes, 0, new_data, 0, size); - } - bytes = new_data; + public void setCapacity(final int capacity) { + if (capacity != getCapacity()) { + this.size = Math.min(size, capacity); + this.bytes = Arrays.copyOf(this.bytes, capacity); } } /** * Set the BytesWritable to the contents of the given newData. + * * @param newData the value to set this BytesWritable to. */ public void set(BytesWritable newData) { @@ -163,7 +164,8 @@ public void set(BytesWritable newData) { } /** - * Set the value to a copy of the given byte range + * Set the value to a copy of the given byte range. + * * @param newData the new values to copy in * @param offset the offset in newData to start at * @param length the number of bytes to copy @@ -174,25 +176,18 @@ public void set(byte[] newData, int offset, int length) { System.arraycopy(newData, offset, bytes, 0, size); } - // inherit javadoc @Override public void readFields(DataInput in) throws IOException { setSize(0); // clear the old data setSize(in.readInt()); in.readFully(bytes, 0, size); } - - // inherit javadoc + @Override public void write(DataOutput out) throws IOException { out.writeInt(size); out.write(bytes, 0, size); } - - @Override - public int hashCode() { - return super.hashCode(); - } /** * Are the two byte sequences equal? @@ -204,25 +199,19 @@ public boolean equals(Object right_obj) { return false; } + @Override + public int hashCode() { + return super.hashCode(); + } + /** * Generate the stream of bytes as hex pairs separated by ' '. */ @Override - public String toString() { - StringBuilder sb = new StringBuilder(3*size); - for (int idx = 0; idx < size; idx++) { - // if not the first, put a blank separator in - if (idx != 0) { - sb.append(' '); - } - String num = Integer.toHexString(0xff & bytes[idx]); - // if it is only one digit, add a leading 0. - if (num.length() < 2) { - sb.append('0'); - } - sb.append(num); - } - return sb.toString(); + public String toString() { + return IntStream.range(0, size) + .mapToObj(idx -> String.format("%02x", bytes[idx])) + .collect(Collectors.joining(" ")); } /** A Comparator optimized for BytesWritable. */ @@ -230,20 +219,20 @@ public static class Comparator extends WritableComparator { public Comparator() { super(BytesWritable.class); } - + /** * Compare the buffers in serialized form. */ @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, - b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES); + return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES, + b2, s2 + LENGTH_BYTES, l2 - LENGTH_BYTES); } } - + static { // register this comparator WritableComparator.define(BytesWritable.class, new Comparator()); } - + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java index 3ab327fe76..716de3deb4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java @@ -24,11 +24,11 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; -import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.MalformedInputException; +import java.nio.charset.StandardCharsets; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.Arrays; @@ -52,63 +52,67 @@ @InterfaceStability.Stable public class Text extends BinaryComparable implements WritableComparable { - + private static final ThreadLocal ENCODER_FACTORY = new ThreadLocal() { @Override protected CharsetEncoder initialValue() { - return Charset.forName("UTF-8").newEncoder(). + return StandardCharsets.UTF_8.newEncoder(). onMalformedInput(CodingErrorAction.REPORT). onUnmappableCharacter(CodingErrorAction.REPORT); } }; - + private static final ThreadLocal DECODER_FACTORY = new ThreadLocal() { @Override protected CharsetDecoder initialValue() { - return Charset.forName("UTF-8").newDecoder(). + return StandardCharsets.UTF_8.newDecoder(). onMalformedInput(CodingErrorAction.REPORT). onUnmappableCharacter(CodingErrorAction.REPORT); } }; - - private static final byte [] EMPTY_BYTES = new byte[0]; - - private byte[] bytes; - private int length; + private static final byte[] EMPTY_BYTES = new byte[0]; + + private byte[] bytes = EMPTY_BYTES; + private int length = 0; + + /** + * Construct an empty text string. + */ public Text() { - bytes = EMPTY_BYTES; } - /** Construct from a string. + /** + * Construct from a string. */ public Text(String string) { set(string); } - /** Construct from another text. */ + /** + * Construct from another text. + */ public Text(Text utf8) { set(utf8); } - /** Construct from a byte array. + /** + * Construct from a byte array. */ public Text(byte[] utf8) { set(utf8); } - + /** * Get a copy of the bytes that is exactly the length of the data. * See {@link #getBytes()} for faster access to the underlying array. */ public byte[] copyBytes() { - byte[] result = new byte[length]; - System.arraycopy(bytes, 0, result, 0, length); - return result; + return Arrays.copyOf(bytes, length); } - + /** * Returns the raw bytes; however, only data up to {@link #getLength()} is * valid. Please use {@link #copyBytes()} if you @@ -119,12 +123,14 @@ public byte[] getBytes() { return bytes; } - /** Returns the number of bytes in the byte array */ + /** + * Returns the number of bytes in the byte array. + */ @Override public int getLength() { return length; } - + /** * Returns the Unicode Scalar Value (32-bit integer value) * for the character at position. Note that this @@ -136,15 +142,15 @@ public int getLength() { public int charAt(int position) { if (position > this.length) return -1; // too long if (position < 0) return -1; // duh. - + ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position); return bytesToCodePoint(bb.slice()); } - + public int find(String what) { return find(what, 0); } - + /** * Finds any occurrence of what in the backing * buffer, starting as position start. The starting @@ -156,11 +162,11 @@ public int find(String what) { */ public int find(String what, int start) { try { - ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length); + ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length); ByteBuffer tgt = encode(what); byte b = tgt.get(); src.position(start); - + while (src.hasRemaining()) { if (b == src.get()) { // matching first byte src.mark(); // save position in loop @@ -186,54 +192,63 @@ public int find(String what, int start) { } return -1; // not found } catch (CharacterCodingException e) { - // can't get here - e.printStackTrace(); - return -1; + throw new RuntimeException("Should not have happened", e); } - } - /** Set to contain the contents of a string. + } + + /** + * Set to contain the contents of a string. */ public void set(String string) { try { ByteBuffer bb = encode(string, true); bytes = bb.array(); length = bb.limit(); - }catch(CharacterCodingException e) { - throw new RuntimeException("Should not have happened ", e); + } catch (CharacterCodingException e) { + throw new RuntimeException("Should not have happened", e); } } - /** Set to a utf8 byte array + /** + * Set to a utf8 byte array. */ public void set(byte[] utf8) { set(utf8, 0, utf8.length); } - - /** copy a text. */ + + /** + * Copy a text. + */ public void set(Text other) { set(other.getBytes(), 0, other.getLength()); } /** - * Set the Text to range of bytes + * Set the Text to range of bytes. + * * @param utf8 the data to copy from * @param start the first position of the new string * @param len the number of bytes of the new string */ public void set(byte[] utf8, int start, int len) { - setCapacity(len, false); + ensureCapacity(len); System.arraycopy(utf8, start, bytes, 0, len); this.length = len; } /** - * Append a range of bytes to the end of the given text + * Append a range of bytes to the end of the given text. + * * @param utf8 the data to copy from * @param start the first position to append from utf8 * @param len the number of bytes to append */ public void append(byte[] utf8, int start, int len) { - setCapacity(length + len, true); + byte[] original = bytes; + int capacity = Math.max(length + len, length + (length >> 1)); + if (ensureCapacity(capacity)) { + System.arraycopy(original, 0, bytes, 0, length); + } System.arraycopy(utf8, start, bytes, length, len); length += len; } @@ -250,47 +265,39 @@ public void clear() { length = 0; } - /* + /** * Sets the capacity of this Text object to at least - * len bytes. If the current buffer is longer, - * then the capacity and existing content of the buffer are - * unchanged. If len is larger - * than the current capacity, the Text object's capacity is - * increased to match. - * @param len the number of bytes we need - * @param keepData should the old data be kept + * capacity bytes. If the current buffer is longer, then the + * capacity and existing content of the buffer are unchanged. If + * capacity is larger than the current capacity, the Text + * object's capacity is increased to match and any existing data is lost. + * + * @param capacity the number of bytes we need + * @return true if the internal array was resized or false otherwise */ - private void setCapacity(int len, boolean keepData) { - if (bytes == null || bytes.length < len) { - if (bytes != null && keepData) { - bytes = Arrays.copyOf(bytes, Math.max(len,length << 1)); - } else { - bytes = new byte[len]; - } + private boolean ensureCapacity(final int capacity) { + if (bytes.length < capacity) { + bytes = new byte[capacity]; + return true; } + return false; } - - /** - * Convert text back to string - * @see java.lang.Object#toString() - */ + @Override public String toString() { try { return decode(bytes, 0, length); } catch (CharacterCodingException e) { - throw new RuntimeException("Should not have happened " , e); + throw new RuntimeException("Should not have happened", e); } } - - /** deserialize - */ + @Override public void readFields(DataInput in) throws IOException { int newLength = WritableUtils.readVInt(in); readWithKnownLength(in, newLength); } - + public void readFields(DataInput in, int maxLength) throws IOException { int newLength = WritableUtils.readVInt(in); if (newLength < 0) { @@ -303,7 +310,9 @@ public void readFields(DataInput in, int maxLength) throws IOException { readWithKnownLength(in, newLength); } - /** Skips over one Text in the input. */ + /** + * Skips over one Text in the input. + */ public static void skip(DataInput in) throws IOException { int length = WritableUtils.readVInt(in); WritableUtils.skipFully(in, length); @@ -315,14 +324,14 @@ public static void skip(DataInput in) throws IOException { * format. */ public void readWithKnownLength(DataInput in, int len) throws IOException { - setCapacity(len, false); + ensureCapacity(len); in.readFully(bytes, 0, len); length = len; } - /** serialize - * write this object to out - * length uses zero-compressed encoding + /** + * Serialize. Write this object to out length uses zero-compressed encoding. + * * @see Writable#write(DataOutput) */ @Override @@ -341,7 +350,10 @@ public void write(DataOutput out, int maxLength) throws IOException { out.write(bytes, 0, length); } - /** Returns true iff o is a Text with the same contents. */ + /** + * Returns true iff o is a Text with the same length and same + * contents. + */ @Override public boolean equals(Object o) { if (o instanceof Text) @@ -365,7 +377,7 @@ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int n1 = WritableUtils.decodeVIntSize(b1[s1]); int n2 = WritableUtils.decodeVIntSize(b2[s2]); - return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2); + return compareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2); } } @@ -383,12 +395,12 @@ public int compare(byte[] b1, int s1, int l1, public static String decode(byte[] utf8) throws CharacterCodingException { return decode(ByteBuffer.wrap(utf8), true); } - + public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException { return decode(ByteBuffer.wrap(utf8, start, length), true); } - + /** * Converts the provided byte array to a String using the * UTF-8 encoding. If replace is true, then @@ -400,7 +412,7 @@ public static String decode(byte[] utf8, int start, int length, boolean replace) throws CharacterCodingException { return decode(ByteBuffer.wrap(utf8, start, length), replace); } - + private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); @@ -463,7 +475,7 @@ public static ByteBuffer encode(String string, boolean replace) public static String readString(DataInput in) throws IOException { return readString(in, Integer.MAX_VALUE); } - + /** Read a UTF8 encoded string with a maximum size */ public static String readString(DataInput in, int maxLength) @@ -473,8 +485,9 @@ public static String readString(DataInput in, int maxLength) in.readFully(bytes, 0, length); return decode(bytes); } - - /** Write a UTF8 encoded string to out + + /** + * Write a UTF8 encoded string to out. */ public static int writeString(DataOutput out, String s) throws IOException { ByteBuffer bytes = encode(s); @@ -484,7 +497,8 @@ public static int writeString(DataOutput out, String s) throws IOException { return length; } - /** Write a UTF8 encoded string with a maximum size to out + /** + * Write a UTF8 encoded string with a maximum size to out. */ public static int writeString(DataOutput out, String s, int maxLength) throws IOException { @@ -501,24 +515,26 @@ public static int writeString(DataOutput out, String s, int maxLength) } ////// states for validateUTF8 - + private static final int LEAD_BYTE = 0; private static final int TRAIL_BYTE_1 = 1; private static final int TRAIL_BYTE = 2; - /** - * Check if a byte array contains valid utf-8 + /** + * Check if a byte array contains valid UTF-8. + * * @param utf8 byte array - * @throws MalformedInputException if the byte array contains invalid utf-8 + * @throws MalformedInputException if the byte array contains invalid UTF-8 */ public static void validateUTF8(byte[] utf8) throws MalformedInputException { - validateUTF8(utf8, 0, utf8.length); + validateUTF8(utf8, 0, utf8.length); } - + /** - * Check to see if a byte array is valid utf-8 + * Check to see if a byte array is valid UTF-8. + * * @param utf8 the array of bytes * @param start the offset of the first byte in the array * @param len the length of the byte sequence @@ -641,7 +657,6 @@ public static int bytesToCodePoint(ByteBuffer bytes) { return ch; } - static final int offsetsFromUTF8[] = { 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 }; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java index 59856a4de1..54df39955d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java @@ -291,9 +291,9 @@ public void testTextText() throws CharacterCodingException { a.append("xdefgxxx".getBytes(), 1, 4); assertEquals("modified aliased string", "abc", b.toString()); assertEquals("appended string incorrectly", "abcdefg", a.toString()); - // add an extra byte so that capacity = 14 and length = 8 + // add an extra byte so that capacity = 10 and length = 8 a.append(new byte[]{'d'}, 0, 1); - assertEquals(14, a.getBytes().length); + assertEquals(10, a.getBytes().length); assertEquals(8, a.copyBytes().length); }