HADOOP-16951: Tidy Up Text and ByteWritables Classes.
1. Remove superfluous code 2. Remove superfluous comments 3. Checkstyle fixes 4. Remove methods that simply call super.method() 5. Use Java 8 facilities to streamline code where applicable 6. Simplify and unify some of the constructs between the two classes 7. Expanding of the arrays be 1.5x instead of 2x per expansion.
This commit is contained in:
parent
2fe122e322
commit
eca05917d6
@ -19,6 +19,9 @@
|
||||
package org.apache.hadoop.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
|
||||
@ -36,7 +39,8 @@
|
||||
public class BytesWritable extends BinaryComparable
|
||||
implements WritableComparable<BinaryComparable> {
|
||||
private static final int LENGTH_BYTES = 4;
|
||||
private static final byte[] EMPTY_BYTES = {};
|
||||
|
||||
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||
|
||||
private int size;
|
||||
private byte[] bytes;
|
||||
@ -44,7 +48,10 @@ public class BytesWritable extends BinaryComparable
|
||||
/**
|
||||
* Create a zero-size sequence.
|
||||
*/
|
||||
public BytesWritable() {this(EMPTY_BYTES);}
|
||||
public BytesWritable() {
|
||||
this.bytes = EMPTY_BYTES;
|
||||
this.size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a BytesWritable using the byte array as the initial value.
|
||||
@ -71,9 +78,7 @@ public BytesWritable(byte[] bytes, int length) {
|
||||
* See {@link #getBytes()} for faster access to the underlying array.
|
||||
*/
|
||||
public byte[] copyBytes() {
|
||||
byte[] result = new byte[size];
|
||||
System.arraycopy(bytes, 0, result, 0, size);
|
||||
return result;
|
||||
return Arrays.copyOf(bytes, size);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -130,6 +135,7 @@ public void setSize(int size) {
|
||||
/**
|
||||
* Get the capacity, which is the maximum size that could handled without
|
||||
* resizing the backing storage.
|
||||
*
|
||||
* @return The number of bytes
|
||||
*/
|
||||
public int getCapacity() {
|
||||
@ -137,25 +143,20 @@ public int getCapacity() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the capacity of the backing storage.
|
||||
* The data is preserved.
|
||||
* @param new_cap The new capacity in bytes.
|
||||
* Change the capacity of the backing storage. The data is preserved.
|
||||
*
|
||||
* @param capacity The new capacity in bytes.
|
||||
*/
|
||||
public void setCapacity(int new_cap) {
|
||||
if (new_cap != getCapacity()) {
|
||||
byte[] new_data = new byte[new_cap];
|
||||
if (new_cap < size) {
|
||||
size = new_cap;
|
||||
}
|
||||
if (size != 0) {
|
||||
System.arraycopy(bytes, 0, new_data, 0, size);
|
||||
}
|
||||
bytes = new_data;
|
||||
public void setCapacity(final int capacity) {
|
||||
if (capacity != getCapacity()) {
|
||||
this.size = Math.min(size, capacity);
|
||||
this.bytes = Arrays.copyOf(this.bytes, capacity);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the BytesWritable to the contents of the given newData.
|
||||
*
|
||||
* @param newData the value to set this BytesWritable to.
|
||||
*/
|
||||
public void set(BytesWritable newData) {
|
||||
@ -163,7 +164,8 @@ public void set(BytesWritable newData) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value to a copy of the given byte range
|
||||
* Set the value to a copy of the given byte range.
|
||||
*
|
||||
* @param newData the new values to copy in
|
||||
* @param offset the offset in newData to start at
|
||||
* @param length the number of bytes to copy
|
||||
@ -174,7 +176,6 @@ public void set(byte[] newData, int offset, int length) {
|
||||
System.arraycopy(newData, offset, bytes, 0, size);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
setSize(0); // clear the old data
|
||||
@ -182,18 +183,12 @@ public void readFields(DataInput in) throws IOException {
|
||||
in.readFully(bytes, 0, size);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(size);
|
||||
out.write(bytes, 0, size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Are the two byte sequences equal?
|
||||
*/
|
||||
@ -204,25 +199,19 @@ public boolean equals(Object right_obj) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the stream of bytes as hex pairs separated by ' '.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder(3*size);
|
||||
for (int idx = 0; idx < size; idx++) {
|
||||
// if not the first, put a blank separator in
|
||||
if (idx != 0) {
|
||||
sb.append(' ');
|
||||
}
|
||||
String num = Integer.toHexString(0xff & bytes[idx]);
|
||||
// if it is only one digit, add a leading 0.
|
||||
if (num.length() < 2) {
|
||||
sb.append('0');
|
||||
}
|
||||
sb.append(num);
|
||||
}
|
||||
return sb.toString();
|
||||
return IntStream.range(0, size)
|
||||
.mapToObj(idx -> String.format("%02x", bytes[idx]))
|
||||
.collect(Collectors.joining(" "));
|
||||
}
|
||||
|
||||
/** A Comparator optimized for BytesWritable. */
|
||||
@ -237,8 +226,8 @@ public Comparator() {
|
||||
@Override
|
||||
public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES,
|
||||
b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
|
||||
return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES,
|
||||
b2, s2 + LENGTH_BYTES, l2 - LENGTH_BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,11 @@
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharacterCodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.nio.charset.MalformedInputException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.CharacterIterator;
|
||||
import java.text.StringCharacterIterator;
|
||||
import java.util.Arrays;
|
||||
@ -57,7 +57,7 @@ public class Text extends BinaryComparable
|
||||
new ThreadLocal<CharsetEncoder>() {
|
||||
@Override
|
||||
protected CharsetEncoder initialValue() {
|
||||
return Charset.forName("UTF-8").newEncoder().
|
||||
return StandardCharsets.UTF_8.newEncoder().
|
||||
onMalformedInput(CodingErrorAction.REPORT).
|
||||
onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
}
|
||||
@ -67,33 +67,39 @@ protected CharsetEncoder initialValue() {
|
||||
new ThreadLocal<CharsetDecoder>() {
|
||||
@Override
|
||||
protected CharsetDecoder initialValue() {
|
||||
return Charset.forName("UTF-8").newDecoder().
|
||||
return StandardCharsets.UTF_8.newDecoder().
|
||||
onMalformedInput(CodingErrorAction.REPORT).
|
||||
onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||
}
|
||||
};
|
||||
|
||||
private static final byte [] EMPTY_BYTES = new byte[0];
|
||||
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||
|
||||
private byte[] bytes;
|
||||
private int length;
|
||||
private byte[] bytes = EMPTY_BYTES;
|
||||
private int length = 0;
|
||||
|
||||
/**
|
||||
* Construct an empty text string.
|
||||
*/
|
||||
public Text() {
|
||||
bytes = EMPTY_BYTES;
|
||||
}
|
||||
|
||||
/** Construct from a string.
|
||||
/**
|
||||
* Construct from a string.
|
||||
*/
|
||||
public Text(String string) {
|
||||
set(string);
|
||||
}
|
||||
|
||||
/** Construct from another text. */
|
||||
/**
|
||||
* Construct from another text.
|
||||
*/
|
||||
public Text(Text utf8) {
|
||||
set(utf8);
|
||||
}
|
||||
|
||||
/** Construct from a byte array.
|
||||
/**
|
||||
* Construct from a byte array.
|
||||
*/
|
||||
public Text(byte[] utf8) {
|
||||
set(utf8);
|
||||
@ -104,9 +110,7 @@ public Text(byte[] utf8) {
|
||||
* See {@link #getBytes()} for faster access to the underlying array.
|
||||
*/
|
||||
public byte[] copyBytes() {
|
||||
byte[] result = new byte[length];
|
||||
System.arraycopy(bytes, 0, result, 0, length);
|
||||
return result;
|
||||
return Arrays.copyOf(bytes, length);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -119,7 +123,9 @@ public byte[] getBytes() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/** Returns the number of bytes in the byte array */
|
||||
/**
|
||||
* Returns the number of bytes in the byte array.
|
||||
*/
|
||||
@Override
|
||||
public int getLength() {
|
||||
return length;
|
||||
@ -156,7 +162,7 @@ public int find(String what) {
|
||||
*/
|
||||
public int find(String what, int start) {
|
||||
try {
|
||||
ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length);
|
||||
ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
|
||||
ByteBuffer tgt = encode(what);
|
||||
byte b = tgt.get();
|
||||
src.position(start);
|
||||
@ -186,54 +192,63 @@ public int find(String what, int start) {
|
||||
}
|
||||
return -1; // not found
|
||||
} catch (CharacterCodingException e) {
|
||||
// can't get here
|
||||
e.printStackTrace();
|
||||
return -1;
|
||||
throw new RuntimeException("Should not have happened", e);
|
||||
}
|
||||
}
|
||||
/** Set to contain the contents of a string.
|
||||
|
||||
/**
|
||||
* Set to contain the contents of a string.
|
||||
*/
|
||||
public void set(String string) {
|
||||
try {
|
||||
ByteBuffer bb = encode(string, true);
|
||||
bytes = bb.array();
|
||||
length = bb.limit();
|
||||
}catch(CharacterCodingException e) {
|
||||
throw new RuntimeException("Should not have happened ", e);
|
||||
} catch (CharacterCodingException e) {
|
||||
throw new RuntimeException("Should not have happened", e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Set to a utf8 byte array
|
||||
/**
|
||||
* Set to a utf8 byte array.
|
||||
*/
|
||||
public void set(byte[] utf8) {
|
||||
set(utf8, 0, utf8.length);
|
||||
}
|
||||
|
||||
/** copy a text. */
|
||||
/**
|
||||
* Copy a text.
|
||||
*/
|
||||
public void set(Text other) {
|
||||
set(other.getBytes(), 0, other.getLength());
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Text to range of bytes
|
||||
* Set the Text to range of bytes.
|
||||
*
|
||||
* @param utf8 the data to copy from
|
||||
* @param start the first position of the new string
|
||||
* @param len the number of bytes of the new string
|
||||
*/
|
||||
public void set(byte[] utf8, int start, int len) {
|
||||
setCapacity(len, false);
|
||||
ensureCapacity(len);
|
||||
System.arraycopy(utf8, start, bytes, 0, len);
|
||||
this.length = len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a range of bytes to the end of the given text
|
||||
* Append a range of bytes to the end of the given text.
|
||||
*
|
||||
* @param utf8 the data to copy from
|
||||
* @param start the first position to append from utf8
|
||||
* @param len the number of bytes to append
|
||||
*/
|
||||
public void append(byte[] utf8, int start, int len) {
|
||||
setCapacity(length + len, true);
|
||||
byte[] original = bytes;
|
||||
int capacity = Math.max(length + len, length + (length >> 1));
|
||||
if (ensureCapacity(capacity)) {
|
||||
System.arraycopy(original, 0, bytes, 0, length);
|
||||
}
|
||||
System.arraycopy(utf8, start, bytes, length, len);
|
||||
length += len;
|
||||
}
|
||||
@ -250,41 +265,33 @@ public void clear() {
|
||||
length = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
* Sets the capacity of this Text object to <em>at least</em>
|
||||
* <code>len</code> bytes. If the current buffer is longer,
|
||||
* then the capacity and existing content of the buffer are
|
||||
* unchanged. If <code>len</code> is larger
|
||||
* than the current capacity, the Text object's capacity is
|
||||
* increased to match.
|
||||
* @param len the number of bytes we need
|
||||
* @param keepData should the old data be kept
|
||||
* <code>capacity</code> bytes. If the current buffer is longer, then the
|
||||
* capacity and existing content of the buffer are unchanged. If
|
||||
* <code>capacity</code> is larger than the current capacity, the Text
|
||||
* object's capacity is increased to match and any existing data is lost.
|
||||
*
|
||||
* @param capacity the number of bytes we need
|
||||
* @return true if the internal array was resized or false otherwise
|
||||
*/
|
||||
private void setCapacity(int len, boolean keepData) {
|
||||
if (bytes == null || bytes.length < len) {
|
||||
if (bytes != null && keepData) {
|
||||
bytes = Arrays.copyOf(bytes, Math.max(len,length << 1));
|
||||
} else {
|
||||
bytes = new byte[len];
|
||||
}
|
||||
private boolean ensureCapacity(final int capacity) {
|
||||
if (bytes.length < capacity) {
|
||||
bytes = new byte[capacity];
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert text back to string
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
try {
|
||||
return decode(bytes, 0, length);
|
||||
} catch (CharacterCodingException e) {
|
||||
throw new RuntimeException("Should not have happened " , e);
|
||||
throw new RuntimeException("Should not have happened", e);
|
||||
}
|
||||
}
|
||||
|
||||
/** deserialize
|
||||
*/
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
int newLength = WritableUtils.readVInt(in);
|
||||
@ -303,7 +310,9 @@ public void readFields(DataInput in, int maxLength) throws IOException {
|
||||
readWithKnownLength(in, newLength);
|
||||
}
|
||||
|
||||
/** Skips over one Text in the input. */
|
||||
/**
|
||||
* Skips over one Text in the input.
|
||||
*/
|
||||
public static void skip(DataInput in) throws IOException {
|
||||
int length = WritableUtils.readVInt(in);
|
||||
WritableUtils.skipFully(in, length);
|
||||
@ -315,14 +324,14 @@ public static void skip(DataInput in) throws IOException {
|
||||
* format.
|
||||
*/
|
||||
public void readWithKnownLength(DataInput in, int len) throws IOException {
|
||||
setCapacity(len, false);
|
||||
ensureCapacity(len);
|
||||
in.readFully(bytes, 0, len);
|
||||
length = len;
|
||||
}
|
||||
|
||||
/** serialize
|
||||
* write this object to out
|
||||
* length uses zero-compressed encoding
|
||||
/**
|
||||
* Serialize. Write this object to out length uses zero-compressed encoding.
|
||||
*
|
||||
* @see Writable#write(DataOutput)
|
||||
*/
|
||||
@Override
|
||||
@ -341,7 +350,10 @@ public void write(DataOutput out, int maxLength) throws IOException {
|
||||
out.write(bytes, 0, length);
|
||||
}
|
||||
|
||||
/** Returns true iff <code>o</code> is a Text with the same contents. */
|
||||
/**
|
||||
* Returns true iff <code>o</code> is a Text with the same length and same
|
||||
* contents.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o instanceof Text)
|
||||
@ -365,7 +377,7 @@ public int compare(byte[] b1, int s1, int l1,
|
||||
byte[] b2, int s2, int l2) {
|
||||
int n1 = WritableUtils.decodeVIntSize(b1[s1]);
|
||||
int n2 = WritableUtils.decodeVIntSize(b2[s2]);
|
||||
return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
|
||||
return compareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2);
|
||||
}
|
||||
}
|
||||
|
||||
@ -474,7 +486,8 @@ public static String readString(DataInput in, int maxLength)
|
||||
return decode(bytes);
|
||||
}
|
||||
|
||||
/** Write a UTF8 encoded string to out
|
||||
/**
|
||||
* Write a UTF8 encoded string to out.
|
||||
*/
|
||||
public static int writeString(DataOutput out, String s) throws IOException {
|
||||
ByteBuffer bytes = encode(s);
|
||||
@ -484,7 +497,8 @@ public static int writeString(DataOutput out, String s) throws IOException {
|
||||
return length;
|
||||
}
|
||||
|
||||
/** Write a UTF8 encoded string with a maximum size to out
|
||||
/**
|
||||
* Write a UTF8 encoded string with a maximum size to out.
|
||||
*/
|
||||
public static int writeString(DataOutput out, String s, int maxLength)
|
||||
throws IOException {
|
||||
@ -509,16 +523,18 @@ public static int writeString(DataOutput out, String s, int maxLength)
|
||||
private static final int TRAIL_BYTE = 2;
|
||||
|
||||
/**
|
||||
* Check if a byte array contains valid utf-8
|
||||
* Check if a byte array contains valid UTF-8.
|
||||
*
|
||||
* @param utf8 byte array
|
||||
* @throws MalformedInputException if the byte array contains invalid utf-8
|
||||
* @throws MalformedInputException if the byte array contains invalid UTF-8
|
||||
*/
|
||||
public static void validateUTF8(byte[] utf8) throws MalformedInputException {
|
||||
validateUTF8(utf8, 0, utf8.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check to see if a byte array is valid utf-8
|
||||
* Check to see if a byte array is valid UTF-8.
|
||||
*
|
||||
* @param utf8 the array of bytes
|
||||
* @param start the offset of the first byte in the array
|
||||
* @param len the length of the byte sequence
|
||||
@ -641,7 +657,6 @@ public static int bytesToCodePoint(ByteBuffer bytes) {
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
static final int offsetsFromUTF8[] =
|
||||
{ 0x00000000, 0x00003080,
|
||||
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
|
||||
|
@ -291,9 +291,9 @@ public void testTextText() throws CharacterCodingException {
|
||||
a.append("xdefgxxx".getBytes(), 1, 4);
|
||||
assertEquals("modified aliased string", "abc", b.toString());
|
||||
assertEquals("appended string incorrectly", "abcdefg", a.toString());
|
||||
// add an extra byte so that capacity = 14 and length = 8
|
||||
// add an extra byte so that capacity = 10 and length = 8
|
||||
a.append(new byte[]{'d'}, 0, 1);
|
||||
assertEquals(14, a.getBytes().length);
|
||||
assertEquals(10, a.getBytes().length);
|
||||
assertEquals(8, a.copyBytes().length);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user