HADOOP-16951: Tidy Up Text and ByteWritables Classes.

1. Remove superfluous code
2. Remove superfluous comments
3. Checkstyle fixes
4. Remove methods that simply call super.method()
5. Use Java 8 facilities to streamline code where applicable
6. Simplify and unify some of the constructs between the two classes
7. Expanding of the arrays be 1.5x instead of 2x per expansion.
This commit is contained in:
belugabehr 2020-04-17 11:16:12 -04:00 committed by GitHub
parent 2fe122e322
commit eca05917d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 147 additions and 143 deletions

View File

@ -19,6 +19,9 @@
package org.apache.hadoop.io;
import java.io.IOException;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.io.DataInput;
import java.io.DataOutput;
@ -36,16 +39,20 @@
public class BytesWritable extends BinaryComparable
implements WritableComparable<BinaryComparable> {
private static final int LENGTH_BYTES = 4;
private static final byte[] EMPTY_BYTES = {};
private static final byte[] EMPTY_BYTES = new byte[0];
private int size;
private byte[] bytes;
/**
* Create a zero-size sequence.
*/
public BytesWritable() {this(EMPTY_BYTES);}
public BytesWritable() {
this.bytes = EMPTY_BYTES;
this.size = 0;
}
/**
* Create a BytesWritable using the byte array as the initial value.
* @param bytes This array becomes the backing storage for the object.
@ -65,17 +72,15 @@ public BytesWritable(byte[] bytes, int length) {
this.bytes = bytes;
this.size = length;
}
/**
* Get a copy of the bytes that is exactly the length of the data.
* See {@link #getBytes()} for faster access to the underlying array.
*/
public byte[] copyBytes() {
byte[] result = new byte[size];
System.arraycopy(bytes, 0, result, 0, size);
return result;
return Arrays.copyOf(bytes, size);
}
/**
* Get the data backing the BytesWritable. Please use {@link #copyBytes()}
* if you need the returned array to be precisely the length of the data.
@ -111,7 +116,7 @@ public int getLength() {
public int getSize() {
return getLength();
}
/**
* Change the size of the buffer. The values in the old range are preserved
* and any new values are undefined. The capacity is changed if it is
@ -126,36 +131,32 @@ public void setSize(int size) {
}
this.size = size;
}
/**
* Get the capacity, which is the maximum size that could handled without
* resizing the backing storage.
*
* @return The number of bytes
*/
public int getCapacity() {
return bytes.length;
}
/**
* Change the capacity of the backing storage.
* The data is preserved.
* @param new_cap The new capacity in bytes.
* Change the capacity of the backing storage. The data is preserved.
*
* @param capacity The new capacity in bytes.
*/
public void setCapacity(int new_cap) {
if (new_cap != getCapacity()) {
byte[] new_data = new byte[new_cap];
if (new_cap < size) {
size = new_cap;
}
if (size != 0) {
System.arraycopy(bytes, 0, new_data, 0, size);
}
bytes = new_data;
public void setCapacity(final int capacity) {
if (capacity != getCapacity()) {
this.size = Math.min(size, capacity);
this.bytes = Arrays.copyOf(this.bytes, capacity);
}
}
/**
* Set the BytesWritable to the contents of the given newData.
*
* @param newData the value to set this BytesWritable to.
*/
public void set(BytesWritable newData) {
@ -163,7 +164,8 @@ public void set(BytesWritable newData) {
}
/**
* Set the value to a copy of the given byte range
* Set the value to a copy of the given byte range.
*
* @param newData the new values to copy in
* @param offset the offset in newData to start at
* @param length the number of bytes to copy
@ -174,25 +176,18 @@ public void set(byte[] newData, int offset, int length) {
System.arraycopy(newData, offset, bytes, 0, size);
}
// inherit javadoc
@Override
public void readFields(DataInput in) throws IOException {
setSize(0); // clear the old data
setSize(in.readInt());
in.readFully(bytes, 0, size);
}
// inherit javadoc
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(size);
out.write(bytes, 0, size);
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Are the two byte sequences equal?
@ -204,25 +199,19 @@ public boolean equals(Object right_obj) {
return false;
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Generate the stream of bytes as hex pairs separated by ' '.
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder(3*size);
for (int idx = 0; idx < size; idx++) {
// if not the first, put a blank separator in
if (idx != 0) {
sb.append(' ');
}
String num = Integer.toHexString(0xff & bytes[idx]);
// if it is only one digit, add a leading 0.
if (num.length() < 2) {
sb.append('0');
}
sb.append(num);
}
return sb.toString();
public String toString() {
return IntStream.range(0, size)
.mapToObj(idx -> String.format("%02x", bytes[idx]))
.collect(Collectors.joining(" "));
}
/** A Comparator optimized for BytesWritable. */
@ -230,20 +219,20 @@ public static class Comparator extends WritableComparator {
public Comparator() {
super(BytesWritable.class);
}
/**
* Compare the buffers in serialized form.
*/
@Override
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES,
b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES,
b2, s2 + LENGTH_BYTES, l2 - LENGTH_BYTES);
}
}
static { // register this comparator
WritableComparator.define(BytesWritable.class, new Comparator());
}
}

View File

@ -24,11 +24,11 @@
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Arrays;
@ -52,63 +52,67 @@
@InterfaceStability.Stable
public class Text extends BinaryComparable
implements WritableComparable<BinaryComparable> {
private static final ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
new ThreadLocal<CharsetEncoder>() {
@Override
protected CharsetEncoder initialValue() {
return Charset.forName("UTF-8").newEncoder().
return StandardCharsets.UTF_8.newEncoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
}
};
private static final ThreadLocal<CharsetDecoder> DECODER_FACTORY =
new ThreadLocal<CharsetDecoder>() {
@Override
protected CharsetDecoder initialValue() {
return Charset.forName("UTF-8").newDecoder().
return StandardCharsets.UTF_8.newDecoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
}
};
private static final byte [] EMPTY_BYTES = new byte[0];
private byte[] bytes;
private int length;
private static final byte[] EMPTY_BYTES = new byte[0];
private byte[] bytes = EMPTY_BYTES;
private int length = 0;
/**
* Construct an empty text string.
*/
public Text() {
bytes = EMPTY_BYTES;
}
/** Construct from a string.
/**
* Construct from a string.
*/
public Text(String string) {
set(string);
}
/** Construct from another text. */
/**
* Construct from another text.
*/
public Text(Text utf8) {
set(utf8);
}
/** Construct from a byte array.
/**
* Construct from a byte array.
*/
public Text(byte[] utf8) {
set(utf8);
}
/**
* Get a copy of the bytes that is exactly the length of the data.
* See {@link #getBytes()} for faster access to the underlying array.
*/
public byte[] copyBytes() {
byte[] result = new byte[length];
System.arraycopy(bytes, 0, result, 0, length);
return result;
return Arrays.copyOf(bytes, length);
}
/**
* Returns the raw bytes; however, only data up to {@link #getLength()} is
* valid. Please use {@link #copyBytes()} if you
@ -119,12 +123,14 @@ public byte[] getBytes() {
return bytes;
}
/** Returns the number of bytes in the byte array */
/**
* Returns the number of bytes in the byte array.
*/
@Override
public int getLength() {
return length;
}
/**
* Returns the Unicode Scalar Value (32-bit integer value)
* for the character at <code>position</code>. Note that this
@ -136,15 +142,15 @@ public int getLength() {
public int charAt(int position) {
if (position > this.length) return -1; // too long
if (position < 0) return -1; // duh.
ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
return bytesToCodePoint(bb.slice());
}
public int find(String what) {
return find(what, 0);
}
/**
* Finds any occurrence of <code>what</code> in the backing
* buffer, starting as position <code>start</code>. The starting
@ -156,11 +162,11 @@ public int find(String what) {
*/
public int find(String what, int start) {
try {
ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length);
ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
ByteBuffer tgt = encode(what);
byte b = tgt.get();
src.position(start);
while (src.hasRemaining()) {
if (b == src.get()) { // matching first byte
src.mark(); // save position in loop
@ -186,54 +192,63 @@ public int find(String what, int start) {
}
return -1; // not found
} catch (CharacterCodingException e) {
// can't get here
e.printStackTrace();
return -1;
throw new RuntimeException("Should not have happened", e);
}
}
/** Set to contain the contents of a string.
}
/**
* Set to contain the contents of a string.
*/
public void set(String string) {
try {
ByteBuffer bb = encode(string, true);
bytes = bb.array();
length = bb.limit();
}catch(CharacterCodingException e) {
throw new RuntimeException("Should not have happened ", e);
} catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened", e);
}
}
/** Set to a utf8 byte array
/**
* Set to a utf8 byte array.
*/
public void set(byte[] utf8) {
set(utf8, 0, utf8.length);
}
/** copy a text. */
/**
* Copy a text.
*/
public void set(Text other) {
set(other.getBytes(), 0, other.getLength());
}
/**
* Set the Text to range of bytes
* Set the Text to range of bytes.
*
* @param utf8 the data to copy from
* @param start the first position of the new string
* @param len the number of bytes of the new string
*/
public void set(byte[] utf8, int start, int len) {
setCapacity(len, false);
ensureCapacity(len);
System.arraycopy(utf8, start, bytes, 0, len);
this.length = len;
}
/**
* Append a range of bytes to the end of the given text
* Append a range of bytes to the end of the given text.
*
* @param utf8 the data to copy from
* @param start the first position to append from utf8
* @param len the number of bytes to append
*/
public void append(byte[] utf8, int start, int len) {
setCapacity(length + len, true);
byte[] original = bytes;
int capacity = Math.max(length + len, length + (length >> 1));
if (ensureCapacity(capacity)) {
System.arraycopy(original, 0, bytes, 0, length);
}
System.arraycopy(utf8, start, bytes, length, len);
length += len;
}
@ -250,47 +265,39 @@ public void clear() {
length = 0;
}
/*
/**
* Sets the capacity of this Text object to <em>at least</em>
* <code>len</code> bytes. If the current buffer is longer,
* then the capacity and existing content of the buffer are
* unchanged. If <code>len</code> is larger
* than the current capacity, the Text object's capacity is
* increased to match.
* @param len the number of bytes we need
* @param keepData should the old data be kept
* <code>capacity</code> bytes. If the current buffer is longer, then the
* capacity and existing content of the buffer are unchanged. If
* <code>capacity</code> is larger than the current capacity, the Text
* object's capacity is increased to match and any existing data is lost.
*
* @param capacity the number of bytes we need
* @return true if the internal array was resized or false otherwise
*/
private void setCapacity(int len, boolean keepData) {
if (bytes == null || bytes.length < len) {
if (bytes != null && keepData) {
bytes = Arrays.copyOf(bytes, Math.max(len,length << 1));
} else {
bytes = new byte[len];
}
private boolean ensureCapacity(final int capacity) {
if (bytes.length < capacity) {
bytes = new byte[capacity];
return true;
}
return false;
}
/**
* Convert text back to string
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
try {
return decode(bytes, 0, length);
} catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened " , e);
throw new RuntimeException("Should not have happened", e);
}
}
/** deserialize
*/
@Override
public void readFields(DataInput in) throws IOException {
int newLength = WritableUtils.readVInt(in);
readWithKnownLength(in, newLength);
}
public void readFields(DataInput in, int maxLength) throws IOException {
int newLength = WritableUtils.readVInt(in);
if (newLength < 0) {
@ -303,7 +310,9 @@ public void readFields(DataInput in, int maxLength) throws IOException {
readWithKnownLength(in, newLength);
}
/** Skips over one Text in the input. */
/**
* Skips over one Text in the input.
*/
public static void skip(DataInput in) throws IOException {
int length = WritableUtils.readVInt(in);
WritableUtils.skipFully(in, length);
@ -315,14 +324,14 @@ public static void skip(DataInput in) throws IOException {
* format.
*/
public void readWithKnownLength(DataInput in, int len) throws IOException {
setCapacity(len, false);
ensureCapacity(len);
in.readFully(bytes, 0, len);
length = len;
}
/** serialize
* write this object to out
* length uses zero-compressed encoding
/**
* Serialize. Write this object to out length uses zero-compressed encoding.
*
* @see Writable#write(DataOutput)
*/
@Override
@ -341,7 +350,10 @@ public void write(DataOutput out, int maxLength) throws IOException {
out.write(bytes, 0, length);
}
/** Returns true iff <code>o</code> is a Text with the same contents. */
/**
* Returns true iff <code>o</code> is a Text with the same length and same
* contents.
*/
@Override
public boolean equals(Object o) {
if (o instanceof Text)
@ -365,7 +377,7 @@ public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
int n1 = WritableUtils.decodeVIntSize(b1[s1]);
int n2 = WritableUtils.decodeVIntSize(b2[s2]);
return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
return compareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2);
}
}
@ -383,12 +395,12 @@ public int compare(byte[] b1, int s1, int l1,
public static String decode(byte[] utf8) throws CharacterCodingException {
return decode(ByteBuffer.wrap(utf8), true);
}
public static String decode(byte[] utf8, int start, int length)
throws CharacterCodingException {
return decode(ByteBuffer.wrap(utf8, start, length), true);
}
/**
* Converts the provided byte array to a String using the
* UTF-8 encoding. If <code>replace</code> is true, then
@ -400,7 +412,7 @@ public static String decode(byte[] utf8, int start, int length, boolean replace)
throws CharacterCodingException {
return decode(ByteBuffer.wrap(utf8, start, length), replace);
}
private static String decode(ByteBuffer utf8, boolean replace)
throws CharacterCodingException {
CharsetDecoder decoder = DECODER_FACTORY.get();
@ -463,7 +475,7 @@ public static ByteBuffer encode(String string, boolean replace)
public static String readString(DataInput in) throws IOException {
return readString(in, Integer.MAX_VALUE);
}
/** Read a UTF8 encoded string with a maximum size
*/
public static String readString(DataInput in, int maxLength)
@ -473,8 +485,9 @@ public static String readString(DataInput in, int maxLength)
in.readFully(bytes, 0, length);
return decode(bytes);
}
/** Write a UTF8 encoded string to out
/**
* Write a UTF8 encoded string to out.
*/
public static int writeString(DataOutput out, String s) throws IOException {
ByteBuffer bytes = encode(s);
@ -484,7 +497,8 @@ public static int writeString(DataOutput out, String s) throws IOException {
return length;
}
/** Write a UTF8 encoded string with a maximum size to out
/**
* Write a UTF8 encoded string with a maximum size to out.
*/
public static int writeString(DataOutput out, String s, int maxLength)
throws IOException {
@ -501,24 +515,26 @@ public static int writeString(DataOutput out, String s, int maxLength)
}
////// states for validateUTF8
private static final int LEAD_BYTE = 0;
private static final int TRAIL_BYTE_1 = 1;
private static final int TRAIL_BYTE = 2;
/**
* Check if a byte array contains valid utf-8
/**
* Check if a byte array contains valid UTF-8.
*
* @param utf8 byte array
* @throws MalformedInputException if the byte array contains invalid utf-8
* @throws MalformedInputException if the byte array contains invalid UTF-8
*/
public static void validateUTF8(byte[] utf8) throws MalformedInputException {
validateUTF8(utf8, 0, utf8.length);
validateUTF8(utf8, 0, utf8.length);
}
/**
* Check to see if a byte array is valid utf-8
* Check to see if a byte array is valid UTF-8.
*
* @param utf8 the array of bytes
* @param start the offset of the first byte in the array
* @param len the length of the byte sequence
@ -641,7 +657,6 @@ public static int bytesToCodePoint(ByteBuffer bytes) {
return ch;
}
static final int offsetsFromUTF8[] =
{ 0x00000000, 0x00003080,
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };

View File

@ -291,9 +291,9 @@ public void testTextText() throws CharacterCodingException {
a.append("xdefgxxx".getBytes(), 1, 4);
assertEquals("modified aliased string", "abc", b.toString());
assertEquals("appended string incorrectly", "abcdefg", a.toString());
// add an extra byte so that capacity = 14 and length = 8
// add an extra byte so that capacity = 10 and length = 8
a.append(new byte[]{'d'}, 0, 1);
assertEquals(14, a.getBytes().length);
assertEquals(10, a.getBytes().length);
assertEquals(8, a.copyBytes().length);
}