HADOOP-16951: Tidy Up Text and ByteWritables Classes.

1. Remove superfluous code
2. Remove superfluous comments
3. Checkstyle fixes
4. Remove methods that simply call super.method()
5. Use Java 8 facilities to streamline code where applicable
6. Simplify and unify some of the constructs between the two classes
7. Expanding of the arrays be 1.5x instead of 2x per expansion.
This commit is contained in:
belugabehr 2020-04-17 11:16:12 -04:00 committed by GitHub
parent 2fe122e322
commit eca05917d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 147 additions and 143 deletions

View File

@ -19,6 +19,9 @@
package org.apache.hadoop.io; package org.apache.hadoop.io;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.io.DataInput; import java.io.DataInput;
import java.io.DataOutput; import java.io.DataOutput;
@ -36,7 +39,8 @@
public class BytesWritable extends BinaryComparable public class BytesWritable extends BinaryComparable
implements WritableComparable<BinaryComparable> { implements WritableComparable<BinaryComparable> {
private static final int LENGTH_BYTES = 4; private static final int LENGTH_BYTES = 4;
private static final byte[] EMPTY_BYTES = {};
private static final byte[] EMPTY_BYTES = new byte[0];
private int size; private int size;
private byte[] bytes; private byte[] bytes;
@ -44,7 +48,10 @@ public class BytesWritable extends BinaryComparable
/** /**
* Create a zero-size sequence. * Create a zero-size sequence.
*/ */
public BytesWritable() {this(EMPTY_BYTES);} public BytesWritable() {
this.bytes = EMPTY_BYTES;
this.size = 0;
}
/** /**
* Create a BytesWritable using the byte array as the initial value. * Create a BytesWritable using the byte array as the initial value.
@ -71,9 +78,7 @@ public BytesWritable(byte[] bytes, int length) {
* See {@link #getBytes()} for faster access to the underlying array. * See {@link #getBytes()} for faster access to the underlying array.
*/ */
public byte[] copyBytes() { public byte[] copyBytes() {
byte[] result = new byte[size]; return Arrays.copyOf(bytes, size);
System.arraycopy(bytes, 0, result, 0, size);
return result;
} }
/** /**
@ -130,6 +135,7 @@ public void setSize(int size) {
/** /**
* Get the capacity, which is the maximum size that could handled without * Get the capacity, which is the maximum size that could handled without
* resizing the backing storage. * resizing the backing storage.
*
* @return The number of bytes * @return The number of bytes
*/ */
public int getCapacity() { public int getCapacity() {
@ -137,25 +143,20 @@ public int getCapacity() {
} }
/** /**
* Change the capacity of the backing storage. * Change the capacity of the backing storage. The data is preserved.
* The data is preserved. *
* @param new_cap The new capacity in bytes. * @param capacity The new capacity in bytes.
*/ */
public void setCapacity(int new_cap) { public void setCapacity(final int capacity) {
if (new_cap != getCapacity()) { if (capacity != getCapacity()) {
byte[] new_data = new byte[new_cap]; this.size = Math.min(size, capacity);
if (new_cap < size) { this.bytes = Arrays.copyOf(this.bytes, capacity);
size = new_cap;
}
if (size != 0) {
System.arraycopy(bytes, 0, new_data, 0, size);
}
bytes = new_data;
} }
} }
/** /**
* Set the BytesWritable to the contents of the given newData. * Set the BytesWritable to the contents of the given newData.
*
* @param newData the value to set this BytesWritable to. * @param newData the value to set this BytesWritable to.
*/ */
public void set(BytesWritable newData) { public void set(BytesWritable newData) {
@ -163,7 +164,8 @@ public void set(BytesWritable newData) {
} }
/** /**
* Set the value to a copy of the given byte range * Set the value to a copy of the given byte range.
*
* @param newData the new values to copy in * @param newData the new values to copy in
* @param offset the offset in newData to start at * @param offset the offset in newData to start at
* @param length the number of bytes to copy * @param length the number of bytes to copy
@ -174,7 +176,6 @@ public void set(byte[] newData, int offset, int length) {
System.arraycopy(newData, offset, bytes, 0, size); System.arraycopy(newData, offset, bytes, 0, size);
} }
// inherit javadoc
@Override @Override
public void readFields(DataInput in) throws IOException { public void readFields(DataInput in) throws IOException {
setSize(0); // clear the old data setSize(0); // clear the old data
@ -182,18 +183,12 @@ public void readFields(DataInput in) throws IOException {
in.readFully(bytes, 0, size); in.readFully(bytes, 0, size);
} }
// inherit javadoc
@Override @Override
public void write(DataOutput out) throws IOException { public void write(DataOutput out) throws IOException {
out.writeInt(size); out.writeInt(size);
out.write(bytes, 0, size); out.write(bytes, 0, size);
} }
@Override
public int hashCode() {
return super.hashCode();
}
/** /**
* Are the two byte sequences equal? * Are the two byte sequences equal?
*/ */
@ -204,25 +199,19 @@ public boolean equals(Object right_obj) {
return false; return false;
} }
@Override
public int hashCode() {
return super.hashCode();
}
/** /**
* Generate the stream of bytes as hex pairs separated by ' '. * Generate the stream of bytes as hex pairs separated by ' '.
*/ */
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(3*size); return IntStream.range(0, size)
for (int idx = 0; idx < size; idx++) { .mapToObj(idx -> String.format("%02x", bytes[idx]))
// if not the first, put a blank separator in .collect(Collectors.joining(" "));
if (idx != 0) {
sb.append(' ');
}
String num = Integer.toHexString(0xff & bytes[idx]);
// if it is only one digit, add a leading 0.
if (num.length() < 2) {
sb.append('0');
}
sb.append(num);
}
return sb.toString();
} }
/** A Comparator optimized for BytesWritable. */ /** A Comparator optimized for BytesWritable. */
@ -237,8 +226,8 @@ public Comparator() {
@Override @Override
public int compare(byte[] b1, int s1, int l1, public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) { byte[] b2, int s2, int l2) {
return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES, return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES,
b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES); b2, s2 + LENGTH_BYTES, l2 - LENGTH_BYTES);
} }
} }

View File

@ -24,11 +24,11 @@
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.CharBuffer; import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException; import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException; import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.text.CharacterIterator; import java.text.CharacterIterator;
import java.text.StringCharacterIterator; import java.text.StringCharacterIterator;
import java.util.Arrays; import java.util.Arrays;
@ -57,7 +57,7 @@ public class Text extends BinaryComparable
new ThreadLocal<CharsetEncoder>() { new ThreadLocal<CharsetEncoder>() {
@Override @Override
protected CharsetEncoder initialValue() { protected CharsetEncoder initialValue() {
return Charset.forName("UTF-8").newEncoder(). return StandardCharsets.UTF_8.newEncoder().
onMalformedInput(CodingErrorAction.REPORT). onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT); onUnmappableCharacter(CodingErrorAction.REPORT);
} }
@ -67,33 +67,39 @@ protected CharsetEncoder initialValue() {
new ThreadLocal<CharsetDecoder>() { new ThreadLocal<CharsetDecoder>() {
@Override @Override
protected CharsetDecoder initialValue() { protected CharsetDecoder initialValue() {
return Charset.forName("UTF-8").newDecoder(). return StandardCharsets.UTF_8.newDecoder().
onMalformedInput(CodingErrorAction.REPORT). onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT); onUnmappableCharacter(CodingErrorAction.REPORT);
} }
}; };
private static final byte [] EMPTY_BYTES = new byte[0]; private static final byte[] EMPTY_BYTES = new byte[0];
private byte[] bytes; private byte[] bytes = EMPTY_BYTES;
private int length; private int length = 0;
/**
* Construct an empty text string.
*/
public Text() { public Text() {
bytes = EMPTY_BYTES;
} }
/** Construct from a string. /**
* Construct from a string.
*/ */
public Text(String string) { public Text(String string) {
set(string); set(string);
} }
/** Construct from another text. */ /**
* Construct from another text.
*/
public Text(Text utf8) { public Text(Text utf8) {
set(utf8); set(utf8);
} }
/** Construct from a byte array. /**
* Construct from a byte array.
*/ */
public Text(byte[] utf8) { public Text(byte[] utf8) {
set(utf8); set(utf8);
@ -104,9 +110,7 @@ public Text(byte[] utf8) {
* See {@link #getBytes()} for faster access to the underlying array. * See {@link #getBytes()} for faster access to the underlying array.
*/ */
public byte[] copyBytes() { public byte[] copyBytes() {
byte[] result = new byte[length]; return Arrays.copyOf(bytes, length);
System.arraycopy(bytes, 0, result, 0, length);
return result;
} }
/** /**
@ -119,7 +123,9 @@ public byte[] getBytes() {
return bytes; return bytes;
} }
/** Returns the number of bytes in the byte array */ /**
* Returns the number of bytes in the byte array.
*/
@Override @Override
public int getLength() { public int getLength() {
return length; return length;
@ -156,7 +162,7 @@ public int find(String what) {
*/ */
public int find(String what, int start) { public int find(String what, int start) {
try { try {
ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length); ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
ByteBuffer tgt = encode(what); ByteBuffer tgt = encode(what);
byte b = tgt.get(); byte b = tgt.get();
src.position(start); src.position(start);
@ -186,54 +192,63 @@ public int find(String what, int start) {
} }
return -1; // not found return -1; // not found
} catch (CharacterCodingException e) { } catch (CharacterCodingException e) {
// can't get here throw new RuntimeException("Should not have happened", e);
e.printStackTrace();
return -1;
} }
} }
/** Set to contain the contents of a string.
/**
* Set to contain the contents of a string.
*/ */
public void set(String string) { public void set(String string) {
try { try {
ByteBuffer bb = encode(string, true); ByteBuffer bb = encode(string, true);
bytes = bb.array(); bytes = bb.array();
length = bb.limit(); length = bb.limit();
}catch(CharacterCodingException e) { } catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened ", e); throw new RuntimeException("Should not have happened", e);
} }
} }
/** Set to a utf8 byte array /**
* Set to a utf8 byte array.
*/ */
public void set(byte[] utf8) { public void set(byte[] utf8) {
set(utf8, 0, utf8.length); set(utf8, 0, utf8.length);
} }
/** copy a text. */ /**
* Copy a text.
*/
public void set(Text other) { public void set(Text other) {
set(other.getBytes(), 0, other.getLength()); set(other.getBytes(), 0, other.getLength());
} }
/** /**
* Set the Text to range of bytes * Set the Text to range of bytes.
*
* @param utf8 the data to copy from * @param utf8 the data to copy from
* @param start the first position of the new string * @param start the first position of the new string
* @param len the number of bytes of the new string * @param len the number of bytes of the new string
*/ */
public void set(byte[] utf8, int start, int len) { public void set(byte[] utf8, int start, int len) {
setCapacity(len, false); ensureCapacity(len);
System.arraycopy(utf8, start, bytes, 0, len); System.arraycopy(utf8, start, bytes, 0, len);
this.length = len; this.length = len;
} }
/** /**
* Append a range of bytes to the end of the given text * Append a range of bytes to the end of the given text.
*
* @param utf8 the data to copy from * @param utf8 the data to copy from
* @param start the first position to append from utf8 * @param start the first position to append from utf8
* @param len the number of bytes to append * @param len the number of bytes to append
*/ */
public void append(byte[] utf8, int start, int len) { public void append(byte[] utf8, int start, int len) {
setCapacity(length + len, true); byte[] original = bytes;
int capacity = Math.max(length + len, length + (length >> 1));
if (ensureCapacity(capacity)) {
System.arraycopy(original, 0, bytes, 0, length);
}
System.arraycopy(utf8, start, bytes, length, len); System.arraycopy(utf8, start, bytes, length, len);
length += len; length += len;
} }
@ -250,41 +265,33 @@ public void clear() {
length = 0; length = 0;
} }
/* /**
* Sets the capacity of this Text object to <em>at least</em> * Sets the capacity of this Text object to <em>at least</em>
* <code>len</code> bytes. If the current buffer is longer, * <code>capacity</code> bytes. If the current buffer is longer, then the
* then the capacity and existing content of the buffer are * capacity and existing content of the buffer are unchanged. If
* unchanged. If <code>len</code> is larger * <code>capacity</code> is larger than the current capacity, the Text
* than the current capacity, the Text object's capacity is * object's capacity is increased to match and any existing data is lost.
* increased to match. *
* @param len the number of bytes we need * @param capacity the number of bytes we need
* @param keepData should the old data be kept * @return true if the internal array was resized or false otherwise
*/ */
private void setCapacity(int len, boolean keepData) { private boolean ensureCapacity(final int capacity) {
if (bytes == null || bytes.length < len) { if (bytes.length < capacity) {
if (bytes != null && keepData) { bytes = new byte[capacity];
bytes = Arrays.copyOf(bytes, Math.max(len,length << 1)); return true;
} else {
bytes = new byte[len];
}
} }
return false;
} }
/**
* Convert text back to string
* @see java.lang.Object#toString()
*/
@Override @Override
public String toString() { public String toString() {
try { try {
return decode(bytes, 0, length); return decode(bytes, 0, length);
} catch (CharacterCodingException e) { } catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened " , e); throw new RuntimeException("Should not have happened", e);
} }
} }
/** deserialize
*/
@Override @Override
public void readFields(DataInput in) throws IOException { public void readFields(DataInput in) throws IOException {
int newLength = WritableUtils.readVInt(in); int newLength = WritableUtils.readVInt(in);
@ -303,7 +310,9 @@ public void readFields(DataInput in, int maxLength) throws IOException {
readWithKnownLength(in, newLength); readWithKnownLength(in, newLength);
} }
/** Skips over one Text in the input. */ /**
* Skips over one Text in the input.
*/
public static void skip(DataInput in) throws IOException { public static void skip(DataInput in) throws IOException {
int length = WritableUtils.readVInt(in); int length = WritableUtils.readVInt(in);
WritableUtils.skipFully(in, length); WritableUtils.skipFully(in, length);
@ -315,14 +324,14 @@ public static void skip(DataInput in) throws IOException {
* format. * format.
*/ */
public void readWithKnownLength(DataInput in, int len) throws IOException { public void readWithKnownLength(DataInput in, int len) throws IOException {
setCapacity(len, false); ensureCapacity(len);
in.readFully(bytes, 0, len); in.readFully(bytes, 0, len);
length = len; length = len;
} }
/** serialize /**
* write this object to out * Serialize. Write this object to out length uses zero-compressed encoding.
* length uses zero-compressed encoding *
* @see Writable#write(DataOutput) * @see Writable#write(DataOutput)
*/ */
@Override @Override
@ -341,7 +350,10 @@ public void write(DataOutput out, int maxLength) throws IOException {
out.write(bytes, 0, length); out.write(bytes, 0, length);
} }
/** Returns true iff <code>o</code> is a Text with the same contents. */ /**
* Returns true iff <code>o</code> is a Text with the same length and same
* contents.
*/
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (o instanceof Text) if (o instanceof Text)
@ -365,7 +377,7 @@ public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) { byte[] b2, int s2, int l2) {
int n1 = WritableUtils.decodeVIntSize(b1[s1]); int n1 = WritableUtils.decodeVIntSize(b1[s1]);
int n2 = WritableUtils.decodeVIntSize(b2[s2]); int n2 = WritableUtils.decodeVIntSize(b2[s2]);
return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2); return compareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2);
} }
} }
@ -474,7 +486,8 @@ public static String readString(DataInput in, int maxLength)
return decode(bytes); return decode(bytes);
} }
/** Write a UTF8 encoded string to out /**
* Write a UTF8 encoded string to out.
*/ */
public static int writeString(DataOutput out, String s) throws IOException { public static int writeString(DataOutput out, String s) throws IOException {
ByteBuffer bytes = encode(s); ByteBuffer bytes = encode(s);
@ -484,7 +497,8 @@ public static int writeString(DataOutput out, String s) throws IOException {
return length; return length;
} }
/** Write a UTF8 encoded string with a maximum size to out /**
* Write a UTF8 encoded string with a maximum size to out.
*/ */
public static int writeString(DataOutput out, String s, int maxLength) public static int writeString(DataOutput out, String s, int maxLength)
throws IOException { throws IOException {
@ -509,16 +523,18 @@ public static int writeString(DataOutput out, String s, int maxLength)
private static final int TRAIL_BYTE = 2; private static final int TRAIL_BYTE = 2;
/** /**
* Check if a byte array contains valid utf-8 * Check if a byte array contains valid UTF-8.
*
* @param utf8 byte array * @param utf8 byte array
* @throws MalformedInputException if the byte array contains invalid utf-8 * @throws MalformedInputException if the byte array contains invalid UTF-8
*/ */
public static void validateUTF8(byte[] utf8) throws MalformedInputException { public static void validateUTF8(byte[] utf8) throws MalformedInputException {
validateUTF8(utf8, 0, utf8.length); validateUTF8(utf8, 0, utf8.length);
} }
/** /**
* Check to see if a byte array is valid utf-8 * Check to see if a byte array is valid UTF-8.
*
* @param utf8 the array of bytes * @param utf8 the array of bytes
* @param start the offset of the first byte in the array * @param start the offset of the first byte in the array
* @param len the length of the byte sequence * @param len the length of the byte sequence
@ -641,7 +657,6 @@ public static int bytesToCodePoint(ByteBuffer bytes) {
return ch; return ch;
} }
static final int offsetsFromUTF8[] = static final int offsetsFromUTF8[] =
{ 0x00000000, 0x00003080, { 0x00000000, 0x00003080,
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 }; 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };

View File

@ -291,9 +291,9 @@ public void testTextText() throws CharacterCodingException {
a.append("xdefgxxx".getBytes(), 1, 4); a.append("xdefgxxx".getBytes(), 1, 4);
assertEquals("modified aliased string", "abc", b.toString()); assertEquals("modified aliased string", "abc", b.toString());
assertEquals("appended string incorrectly", "abcdefg", a.toString()); assertEquals("appended string incorrectly", "abcdefg", a.toString());
// add an extra byte so that capacity = 14 and length = 8 // add an extra byte so that capacity = 10 and length = 8
a.append(new byte[]{'d'}, 0, 1); a.append(new byte[]{'d'}, 0, 1);
assertEquals(14, a.getBytes().length); assertEquals(10, a.getBytes().length);
assertEquals(8, a.copyBytes().length); assertEquals(8, a.copyBytes().length);
} }