HADOOP-16951: Tidy Up Text and ByteWritables Classes.
1. Remove superfluous code 2. Remove superfluous comments 3. Checkstyle fixes 4. Remove methods that simply call super.method() 5. Use Java 8 facilities to streamline code where applicable 6. Simplify and unify some of the constructs between the two classes 7. Expanding of the arrays be 1.5x instead of 2x per expansion.
This commit is contained in:
parent
2fe122e322
commit
eca05917d6
@ -19,6 +19,9 @@
|
|||||||
package org.apache.hadoop.io;
|
package org.apache.hadoop.io;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.IntStream;
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
|
|
||||||
@ -36,16 +39,20 @@
|
|||||||
public class BytesWritable extends BinaryComparable
|
public class BytesWritable extends BinaryComparable
|
||||||
implements WritableComparable<BinaryComparable> {
|
implements WritableComparable<BinaryComparable> {
|
||||||
private static final int LENGTH_BYTES = 4;
|
private static final int LENGTH_BYTES = 4;
|
||||||
private static final byte[] EMPTY_BYTES = {};
|
|
||||||
|
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||||
|
|
||||||
private int size;
|
private int size;
|
||||||
private byte[] bytes;
|
private byte[] bytes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a zero-size sequence.
|
* Create a zero-size sequence.
|
||||||
*/
|
*/
|
||||||
public BytesWritable() {this(EMPTY_BYTES);}
|
public BytesWritable() {
|
||||||
|
this.bytes = EMPTY_BYTES;
|
||||||
|
this.size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a BytesWritable using the byte array as the initial value.
|
* Create a BytesWritable using the byte array as the initial value.
|
||||||
* @param bytes This array becomes the backing storage for the object.
|
* @param bytes This array becomes the backing storage for the object.
|
||||||
@ -65,17 +72,15 @@ public BytesWritable(byte[] bytes, int length) {
|
|||||||
this.bytes = bytes;
|
this.bytes = bytes;
|
||||||
this.size = length;
|
this.size = length;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a copy of the bytes that is exactly the length of the data.
|
* Get a copy of the bytes that is exactly the length of the data.
|
||||||
* See {@link #getBytes()} for faster access to the underlying array.
|
* See {@link #getBytes()} for faster access to the underlying array.
|
||||||
*/
|
*/
|
||||||
public byte[] copyBytes() {
|
public byte[] copyBytes() {
|
||||||
byte[] result = new byte[size];
|
return Arrays.copyOf(bytes, size);
|
||||||
System.arraycopy(bytes, 0, result, 0, size);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the data backing the BytesWritable. Please use {@link #copyBytes()}
|
* Get the data backing the BytesWritable. Please use {@link #copyBytes()}
|
||||||
* if you need the returned array to be precisely the length of the data.
|
* if you need the returned array to be precisely the length of the data.
|
||||||
@ -111,7 +116,7 @@ public int getLength() {
|
|||||||
public int getSize() {
|
public int getSize() {
|
||||||
return getLength();
|
return getLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Change the size of the buffer. The values in the old range are preserved
|
* Change the size of the buffer. The values in the old range are preserved
|
||||||
* and any new values are undefined. The capacity is changed if it is
|
* and any new values are undefined. The capacity is changed if it is
|
||||||
@ -126,36 +131,32 @@ public void setSize(int size) {
|
|||||||
}
|
}
|
||||||
this.size = size;
|
this.size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the capacity, which is the maximum size that could handled without
|
* Get the capacity, which is the maximum size that could handled without
|
||||||
* resizing the backing storage.
|
* resizing the backing storage.
|
||||||
|
*
|
||||||
* @return The number of bytes
|
* @return The number of bytes
|
||||||
*/
|
*/
|
||||||
public int getCapacity() {
|
public int getCapacity() {
|
||||||
return bytes.length;
|
return bytes.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Change the capacity of the backing storage.
|
* Change the capacity of the backing storage. The data is preserved.
|
||||||
* The data is preserved.
|
*
|
||||||
* @param new_cap The new capacity in bytes.
|
* @param capacity The new capacity in bytes.
|
||||||
*/
|
*/
|
||||||
public void setCapacity(int new_cap) {
|
public void setCapacity(final int capacity) {
|
||||||
if (new_cap != getCapacity()) {
|
if (capacity != getCapacity()) {
|
||||||
byte[] new_data = new byte[new_cap];
|
this.size = Math.min(size, capacity);
|
||||||
if (new_cap < size) {
|
this.bytes = Arrays.copyOf(this.bytes, capacity);
|
||||||
size = new_cap;
|
|
||||||
}
|
|
||||||
if (size != 0) {
|
|
||||||
System.arraycopy(bytes, 0, new_data, 0, size);
|
|
||||||
}
|
|
||||||
bytes = new_data;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the BytesWritable to the contents of the given newData.
|
* Set the BytesWritable to the contents of the given newData.
|
||||||
|
*
|
||||||
* @param newData the value to set this BytesWritable to.
|
* @param newData the value to set this BytesWritable to.
|
||||||
*/
|
*/
|
||||||
public void set(BytesWritable newData) {
|
public void set(BytesWritable newData) {
|
||||||
@ -163,7 +164,8 @@ public void set(BytesWritable newData) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value to a copy of the given byte range
|
* Set the value to a copy of the given byte range.
|
||||||
|
*
|
||||||
* @param newData the new values to copy in
|
* @param newData the new values to copy in
|
||||||
* @param offset the offset in newData to start at
|
* @param offset the offset in newData to start at
|
||||||
* @param length the number of bytes to copy
|
* @param length the number of bytes to copy
|
||||||
@ -174,25 +176,18 @@ public void set(byte[] newData, int offset, int length) {
|
|||||||
System.arraycopy(newData, offset, bytes, 0, size);
|
System.arraycopy(newData, offset, bytes, 0, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// inherit javadoc
|
|
||||||
@Override
|
@Override
|
||||||
public void readFields(DataInput in) throws IOException {
|
public void readFields(DataInput in) throws IOException {
|
||||||
setSize(0); // clear the old data
|
setSize(0); // clear the old data
|
||||||
setSize(in.readInt());
|
setSize(in.readInt());
|
||||||
in.readFully(bytes, 0, size);
|
in.readFully(bytes, 0, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// inherit javadoc
|
|
||||||
@Override
|
@Override
|
||||||
public void write(DataOutput out) throws IOException {
|
public void write(DataOutput out) throws IOException {
|
||||||
out.writeInt(size);
|
out.writeInt(size);
|
||||||
out.write(bytes, 0, size);
|
out.write(bytes, 0, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return super.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Are the two byte sequences equal?
|
* Are the two byte sequences equal?
|
||||||
@ -204,25 +199,19 @@ public boolean equals(Object right_obj) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return super.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate the stream of bytes as hex pairs separated by ' '.
|
* Generate the stream of bytes as hex pairs separated by ' '.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder(3*size);
|
return IntStream.range(0, size)
|
||||||
for (int idx = 0; idx < size; idx++) {
|
.mapToObj(idx -> String.format("%02x", bytes[idx]))
|
||||||
// if not the first, put a blank separator in
|
.collect(Collectors.joining(" "));
|
||||||
if (idx != 0) {
|
|
||||||
sb.append(' ');
|
|
||||||
}
|
|
||||||
String num = Integer.toHexString(0xff & bytes[idx]);
|
|
||||||
// if it is only one digit, add a leading 0.
|
|
||||||
if (num.length() < 2) {
|
|
||||||
sb.append('0');
|
|
||||||
}
|
|
||||||
sb.append(num);
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A Comparator optimized for BytesWritable. */
|
/** A Comparator optimized for BytesWritable. */
|
||||||
@ -230,20 +219,20 @@ public static class Comparator extends WritableComparator {
|
|||||||
public Comparator() {
|
public Comparator() {
|
||||||
super(BytesWritable.class);
|
super(BytesWritable.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare the buffers in serialized form.
|
* Compare the buffers in serialized form.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int compare(byte[] b1, int s1, int l1,
|
public int compare(byte[] b1, int s1, int l1,
|
||||||
byte[] b2, int s2, int l2) {
|
byte[] b2, int s2, int l2) {
|
||||||
return compareBytes(b1, s1+LENGTH_BYTES, l1-LENGTH_BYTES,
|
return compareBytes(b1, s1 + LENGTH_BYTES, l1 - LENGTH_BYTES,
|
||||||
b2, s2+LENGTH_BYTES, l2-LENGTH_BYTES);
|
b2, s2 + LENGTH_BYTES, l2 - LENGTH_BYTES);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static { // register this comparator
|
static { // register this comparator
|
||||||
WritableComparator.define(BytesWritable.class, new Comparator());
|
WritableComparator.define(BytesWritable.class, new Comparator());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -24,11 +24,11 @@
|
|||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
import java.nio.charset.CharacterCodingException;
|
import java.nio.charset.CharacterCodingException;
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.nio.charset.CharsetDecoder;
|
import java.nio.charset.CharsetDecoder;
|
||||||
import java.nio.charset.CharsetEncoder;
|
import java.nio.charset.CharsetEncoder;
|
||||||
import java.nio.charset.CodingErrorAction;
|
import java.nio.charset.CodingErrorAction;
|
||||||
import java.nio.charset.MalformedInputException;
|
import java.nio.charset.MalformedInputException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.CharacterIterator;
|
import java.text.CharacterIterator;
|
||||||
import java.text.StringCharacterIterator;
|
import java.text.StringCharacterIterator;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@ -52,63 +52,67 @@
|
|||||||
@InterfaceStability.Stable
|
@InterfaceStability.Stable
|
||||||
public class Text extends BinaryComparable
|
public class Text extends BinaryComparable
|
||||||
implements WritableComparable<BinaryComparable> {
|
implements WritableComparable<BinaryComparable> {
|
||||||
|
|
||||||
private static final ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
|
private static final ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
|
||||||
new ThreadLocal<CharsetEncoder>() {
|
new ThreadLocal<CharsetEncoder>() {
|
||||||
@Override
|
@Override
|
||||||
protected CharsetEncoder initialValue() {
|
protected CharsetEncoder initialValue() {
|
||||||
return Charset.forName("UTF-8").newEncoder().
|
return StandardCharsets.UTF_8.newEncoder().
|
||||||
onMalformedInput(CodingErrorAction.REPORT).
|
onMalformedInput(CodingErrorAction.REPORT).
|
||||||
onUnmappableCharacter(CodingErrorAction.REPORT);
|
onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private static final ThreadLocal<CharsetDecoder> DECODER_FACTORY =
|
private static final ThreadLocal<CharsetDecoder> DECODER_FACTORY =
|
||||||
new ThreadLocal<CharsetDecoder>() {
|
new ThreadLocal<CharsetDecoder>() {
|
||||||
@Override
|
@Override
|
||||||
protected CharsetDecoder initialValue() {
|
protected CharsetDecoder initialValue() {
|
||||||
return Charset.forName("UTF-8").newDecoder().
|
return StandardCharsets.UTF_8.newDecoder().
|
||||||
onMalformedInput(CodingErrorAction.REPORT).
|
onMalformedInput(CodingErrorAction.REPORT).
|
||||||
onUnmappableCharacter(CodingErrorAction.REPORT);
|
onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private static final byte [] EMPTY_BYTES = new byte[0];
|
|
||||||
|
|
||||||
private byte[] bytes;
|
|
||||||
private int length;
|
|
||||||
|
|
||||||
|
private static final byte[] EMPTY_BYTES = new byte[0];
|
||||||
|
|
||||||
|
private byte[] bytes = EMPTY_BYTES;
|
||||||
|
private int length = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct an empty text string.
|
||||||
|
*/
|
||||||
public Text() {
|
public Text() {
|
||||||
bytes = EMPTY_BYTES;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Construct from a string.
|
/**
|
||||||
|
* Construct from a string.
|
||||||
*/
|
*/
|
||||||
public Text(String string) {
|
public Text(String string) {
|
||||||
set(string);
|
set(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Construct from another text. */
|
/**
|
||||||
|
* Construct from another text.
|
||||||
|
*/
|
||||||
public Text(Text utf8) {
|
public Text(Text utf8) {
|
||||||
set(utf8);
|
set(utf8);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Construct from a byte array.
|
/**
|
||||||
|
* Construct from a byte array.
|
||||||
*/
|
*/
|
||||||
public Text(byte[] utf8) {
|
public Text(byte[] utf8) {
|
||||||
set(utf8);
|
set(utf8);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a copy of the bytes that is exactly the length of the data.
|
* Get a copy of the bytes that is exactly the length of the data.
|
||||||
* See {@link #getBytes()} for faster access to the underlying array.
|
* See {@link #getBytes()} for faster access to the underlying array.
|
||||||
*/
|
*/
|
||||||
public byte[] copyBytes() {
|
public byte[] copyBytes() {
|
||||||
byte[] result = new byte[length];
|
return Arrays.copyOf(bytes, length);
|
||||||
System.arraycopy(bytes, 0, result, 0, length);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the raw bytes; however, only data up to {@link #getLength()} is
|
* Returns the raw bytes; however, only data up to {@link #getLength()} is
|
||||||
* valid. Please use {@link #copyBytes()} if you
|
* valid. Please use {@link #copyBytes()} if you
|
||||||
@ -119,12 +123,14 @@ public byte[] getBytes() {
|
|||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the number of bytes in the byte array */
|
/**
|
||||||
|
* Returns the number of bytes in the byte array.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int getLength() {
|
public int getLength() {
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the Unicode Scalar Value (32-bit integer value)
|
* Returns the Unicode Scalar Value (32-bit integer value)
|
||||||
* for the character at <code>position</code>. Note that this
|
* for the character at <code>position</code>. Note that this
|
||||||
@ -136,15 +142,15 @@ public int getLength() {
|
|||||||
public int charAt(int position) {
|
public int charAt(int position) {
|
||||||
if (position > this.length) return -1; // too long
|
if (position > this.length) return -1; // too long
|
||||||
if (position < 0) return -1; // duh.
|
if (position < 0) return -1; // duh.
|
||||||
|
|
||||||
ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
|
ByteBuffer bb = (ByteBuffer)ByteBuffer.wrap(bytes).position(position);
|
||||||
return bytesToCodePoint(bb.slice());
|
return bytesToCodePoint(bb.slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
public int find(String what) {
|
public int find(String what) {
|
||||||
return find(what, 0);
|
return find(what, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds any occurrence of <code>what</code> in the backing
|
* Finds any occurrence of <code>what</code> in the backing
|
||||||
* buffer, starting as position <code>start</code>. The starting
|
* buffer, starting as position <code>start</code>. The starting
|
||||||
@ -156,11 +162,11 @@ public int find(String what) {
|
|||||||
*/
|
*/
|
||||||
public int find(String what, int start) {
|
public int find(String what, int start) {
|
||||||
try {
|
try {
|
||||||
ByteBuffer src = ByteBuffer.wrap(this.bytes,0,this.length);
|
ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
|
||||||
ByteBuffer tgt = encode(what);
|
ByteBuffer tgt = encode(what);
|
||||||
byte b = tgt.get();
|
byte b = tgt.get();
|
||||||
src.position(start);
|
src.position(start);
|
||||||
|
|
||||||
while (src.hasRemaining()) {
|
while (src.hasRemaining()) {
|
||||||
if (b == src.get()) { // matching first byte
|
if (b == src.get()) { // matching first byte
|
||||||
src.mark(); // save position in loop
|
src.mark(); // save position in loop
|
||||||
@ -186,54 +192,63 @@ public int find(String what, int start) {
|
|||||||
}
|
}
|
||||||
return -1; // not found
|
return -1; // not found
|
||||||
} catch (CharacterCodingException e) {
|
} catch (CharacterCodingException e) {
|
||||||
// can't get here
|
throw new RuntimeException("Should not have happened", e);
|
||||||
e.printStackTrace();
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/** Set to contain the contents of a string.
|
|
||||||
|
/**
|
||||||
|
* Set to contain the contents of a string.
|
||||||
*/
|
*/
|
||||||
public void set(String string) {
|
public void set(String string) {
|
||||||
try {
|
try {
|
||||||
ByteBuffer bb = encode(string, true);
|
ByteBuffer bb = encode(string, true);
|
||||||
bytes = bb.array();
|
bytes = bb.array();
|
||||||
length = bb.limit();
|
length = bb.limit();
|
||||||
}catch(CharacterCodingException e) {
|
} catch (CharacterCodingException e) {
|
||||||
throw new RuntimeException("Should not have happened ", e);
|
throw new RuntimeException("Should not have happened", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Set to a utf8 byte array
|
/**
|
||||||
|
* Set to a utf8 byte array.
|
||||||
*/
|
*/
|
||||||
public void set(byte[] utf8) {
|
public void set(byte[] utf8) {
|
||||||
set(utf8, 0, utf8.length);
|
set(utf8, 0, utf8.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** copy a text. */
|
/**
|
||||||
|
* Copy a text.
|
||||||
|
*/
|
||||||
public void set(Text other) {
|
public void set(Text other) {
|
||||||
set(other.getBytes(), 0, other.getLength());
|
set(other.getBytes(), 0, other.getLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the Text to range of bytes
|
* Set the Text to range of bytes.
|
||||||
|
*
|
||||||
* @param utf8 the data to copy from
|
* @param utf8 the data to copy from
|
||||||
* @param start the first position of the new string
|
* @param start the first position of the new string
|
||||||
* @param len the number of bytes of the new string
|
* @param len the number of bytes of the new string
|
||||||
*/
|
*/
|
||||||
public void set(byte[] utf8, int start, int len) {
|
public void set(byte[] utf8, int start, int len) {
|
||||||
setCapacity(len, false);
|
ensureCapacity(len);
|
||||||
System.arraycopy(utf8, start, bytes, 0, len);
|
System.arraycopy(utf8, start, bytes, 0, len);
|
||||||
this.length = len;
|
this.length = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append a range of bytes to the end of the given text
|
* Append a range of bytes to the end of the given text.
|
||||||
|
*
|
||||||
* @param utf8 the data to copy from
|
* @param utf8 the data to copy from
|
||||||
* @param start the first position to append from utf8
|
* @param start the first position to append from utf8
|
||||||
* @param len the number of bytes to append
|
* @param len the number of bytes to append
|
||||||
*/
|
*/
|
||||||
public void append(byte[] utf8, int start, int len) {
|
public void append(byte[] utf8, int start, int len) {
|
||||||
setCapacity(length + len, true);
|
byte[] original = bytes;
|
||||||
|
int capacity = Math.max(length + len, length + (length >> 1));
|
||||||
|
if (ensureCapacity(capacity)) {
|
||||||
|
System.arraycopy(original, 0, bytes, 0, length);
|
||||||
|
}
|
||||||
System.arraycopy(utf8, start, bytes, length, len);
|
System.arraycopy(utf8, start, bytes, length, len);
|
||||||
length += len;
|
length += len;
|
||||||
}
|
}
|
||||||
@ -250,47 +265,39 @@ public void clear() {
|
|||||||
length = 0;
|
length = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* Sets the capacity of this Text object to <em>at least</em>
|
* Sets the capacity of this Text object to <em>at least</em>
|
||||||
* <code>len</code> bytes. If the current buffer is longer,
|
* <code>capacity</code> bytes. If the current buffer is longer, then the
|
||||||
* then the capacity and existing content of the buffer are
|
* capacity and existing content of the buffer are unchanged. If
|
||||||
* unchanged. If <code>len</code> is larger
|
* <code>capacity</code> is larger than the current capacity, the Text
|
||||||
* than the current capacity, the Text object's capacity is
|
* object's capacity is increased to match and any existing data is lost.
|
||||||
* increased to match.
|
*
|
||||||
* @param len the number of bytes we need
|
* @param capacity the number of bytes we need
|
||||||
* @param keepData should the old data be kept
|
* @return true if the internal array was resized or false otherwise
|
||||||
*/
|
*/
|
||||||
private void setCapacity(int len, boolean keepData) {
|
private boolean ensureCapacity(final int capacity) {
|
||||||
if (bytes == null || bytes.length < len) {
|
if (bytes.length < capacity) {
|
||||||
if (bytes != null && keepData) {
|
bytes = new byte[capacity];
|
||||||
bytes = Arrays.copyOf(bytes, Math.max(len,length << 1));
|
return true;
|
||||||
} else {
|
|
||||||
bytes = new byte[len];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert text back to string
|
|
||||||
* @see java.lang.Object#toString()
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
try {
|
try {
|
||||||
return decode(bytes, 0, length);
|
return decode(bytes, 0, length);
|
||||||
} catch (CharacterCodingException e) {
|
} catch (CharacterCodingException e) {
|
||||||
throw new RuntimeException("Should not have happened " , e);
|
throw new RuntimeException("Should not have happened", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** deserialize
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public void readFields(DataInput in) throws IOException {
|
public void readFields(DataInput in) throws IOException {
|
||||||
int newLength = WritableUtils.readVInt(in);
|
int newLength = WritableUtils.readVInt(in);
|
||||||
readWithKnownLength(in, newLength);
|
readWithKnownLength(in, newLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void readFields(DataInput in, int maxLength) throws IOException {
|
public void readFields(DataInput in, int maxLength) throws IOException {
|
||||||
int newLength = WritableUtils.readVInt(in);
|
int newLength = WritableUtils.readVInt(in);
|
||||||
if (newLength < 0) {
|
if (newLength < 0) {
|
||||||
@ -303,7 +310,9 @@ public void readFields(DataInput in, int maxLength) throws IOException {
|
|||||||
readWithKnownLength(in, newLength);
|
readWithKnownLength(in, newLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Skips over one Text in the input. */
|
/**
|
||||||
|
* Skips over one Text in the input.
|
||||||
|
*/
|
||||||
public static void skip(DataInput in) throws IOException {
|
public static void skip(DataInput in) throws IOException {
|
||||||
int length = WritableUtils.readVInt(in);
|
int length = WritableUtils.readVInt(in);
|
||||||
WritableUtils.skipFully(in, length);
|
WritableUtils.skipFully(in, length);
|
||||||
@ -315,14 +324,14 @@ public static void skip(DataInput in) throws IOException {
|
|||||||
* format.
|
* format.
|
||||||
*/
|
*/
|
||||||
public void readWithKnownLength(DataInput in, int len) throws IOException {
|
public void readWithKnownLength(DataInput in, int len) throws IOException {
|
||||||
setCapacity(len, false);
|
ensureCapacity(len);
|
||||||
in.readFully(bytes, 0, len);
|
in.readFully(bytes, 0, len);
|
||||||
length = len;
|
length = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** serialize
|
/**
|
||||||
* write this object to out
|
* Serialize. Write this object to out length uses zero-compressed encoding.
|
||||||
* length uses zero-compressed encoding
|
*
|
||||||
* @see Writable#write(DataOutput)
|
* @see Writable#write(DataOutput)
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
@ -341,7 +350,10 @@ public void write(DataOutput out, int maxLength) throws IOException {
|
|||||||
out.write(bytes, 0, length);
|
out.write(bytes, 0, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true iff <code>o</code> is a Text with the same contents. */
|
/**
|
||||||
|
* Returns true iff <code>o</code> is a Text with the same length and same
|
||||||
|
* contents.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if (o instanceof Text)
|
if (o instanceof Text)
|
||||||
@ -365,7 +377,7 @@ public int compare(byte[] b1, int s1, int l1,
|
|||||||
byte[] b2, int s2, int l2) {
|
byte[] b2, int s2, int l2) {
|
||||||
int n1 = WritableUtils.decodeVIntSize(b1[s1]);
|
int n1 = WritableUtils.decodeVIntSize(b1[s1]);
|
||||||
int n2 = WritableUtils.decodeVIntSize(b2[s2]);
|
int n2 = WritableUtils.decodeVIntSize(b2[s2]);
|
||||||
return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
|
return compareBytes(b1, s1 + n1, l1 - n1, b2, s2 + n2, l2 - n2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,12 +395,12 @@ public int compare(byte[] b1, int s1, int l1,
|
|||||||
public static String decode(byte[] utf8) throws CharacterCodingException {
|
public static String decode(byte[] utf8) throws CharacterCodingException {
|
||||||
return decode(ByteBuffer.wrap(utf8), true);
|
return decode(ByteBuffer.wrap(utf8), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String decode(byte[] utf8, int start, int length)
|
public static String decode(byte[] utf8, int start, int length)
|
||||||
throws CharacterCodingException {
|
throws CharacterCodingException {
|
||||||
return decode(ByteBuffer.wrap(utf8, start, length), true);
|
return decode(ByteBuffer.wrap(utf8, start, length), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the provided byte array to a String using the
|
* Converts the provided byte array to a String using the
|
||||||
* UTF-8 encoding. If <code>replace</code> is true, then
|
* UTF-8 encoding. If <code>replace</code> is true, then
|
||||||
@ -400,7 +412,7 @@ public static String decode(byte[] utf8, int start, int length, boolean replace)
|
|||||||
throws CharacterCodingException {
|
throws CharacterCodingException {
|
||||||
return decode(ByteBuffer.wrap(utf8, start, length), replace);
|
return decode(ByteBuffer.wrap(utf8, start, length), replace);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String decode(ByteBuffer utf8, boolean replace)
|
private static String decode(ByteBuffer utf8, boolean replace)
|
||||||
throws CharacterCodingException {
|
throws CharacterCodingException {
|
||||||
CharsetDecoder decoder = DECODER_FACTORY.get();
|
CharsetDecoder decoder = DECODER_FACTORY.get();
|
||||||
@ -463,7 +475,7 @@ public static ByteBuffer encode(String string, boolean replace)
|
|||||||
public static String readString(DataInput in) throws IOException {
|
public static String readString(DataInput in) throws IOException {
|
||||||
return readString(in, Integer.MAX_VALUE);
|
return readString(in, Integer.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Read a UTF8 encoded string with a maximum size
|
/** Read a UTF8 encoded string with a maximum size
|
||||||
*/
|
*/
|
||||||
public static String readString(DataInput in, int maxLength)
|
public static String readString(DataInput in, int maxLength)
|
||||||
@ -473,8 +485,9 @@ public static String readString(DataInput in, int maxLength)
|
|||||||
in.readFully(bytes, 0, length);
|
in.readFully(bytes, 0, length);
|
||||||
return decode(bytes);
|
return decode(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Write a UTF8 encoded string to out
|
/**
|
||||||
|
* Write a UTF8 encoded string to out.
|
||||||
*/
|
*/
|
||||||
public static int writeString(DataOutput out, String s) throws IOException {
|
public static int writeString(DataOutput out, String s) throws IOException {
|
||||||
ByteBuffer bytes = encode(s);
|
ByteBuffer bytes = encode(s);
|
||||||
@ -484,7 +497,8 @@ public static int writeString(DataOutput out, String s) throws IOException {
|
|||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Write a UTF8 encoded string with a maximum size to out
|
/**
|
||||||
|
* Write a UTF8 encoded string with a maximum size to out.
|
||||||
*/
|
*/
|
||||||
public static int writeString(DataOutput out, String s, int maxLength)
|
public static int writeString(DataOutput out, String s, int maxLength)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
@ -501,24 +515,26 @@ public static int writeString(DataOutput out, String s, int maxLength)
|
|||||||
}
|
}
|
||||||
|
|
||||||
////// states for validateUTF8
|
////// states for validateUTF8
|
||||||
|
|
||||||
private static final int LEAD_BYTE = 0;
|
private static final int LEAD_BYTE = 0;
|
||||||
|
|
||||||
private static final int TRAIL_BYTE_1 = 1;
|
private static final int TRAIL_BYTE_1 = 1;
|
||||||
|
|
||||||
private static final int TRAIL_BYTE = 2;
|
private static final int TRAIL_BYTE = 2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a byte array contains valid utf-8
|
* Check if a byte array contains valid UTF-8.
|
||||||
|
*
|
||||||
* @param utf8 byte array
|
* @param utf8 byte array
|
||||||
* @throws MalformedInputException if the byte array contains invalid utf-8
|
* @throws MalformedInputException if the byte array contains invalid UTF-8
|
||||||
*/
|
*/
|
||||||
public static void validateUTF8(byte[] utf8) throws MalformedInputException {
|
public static void validateUTF8(byte[] utf8) throws MalformedInputException {
|
||||||
validateUTF8(utf8, 0, utf8.length);
|
validateUTF8(utf8, 0, utf8.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check to see if a byte array is valid utf-8
|
* Check to see if a byte array is valid UTF-8.
|
||||||
|
*
|
||||||
* @param utf8 the array of bytes
|
* @param utf8 the array of bytes
|
||||||
* @param start the offset of the first byte in the array
|
* @param start the offset of the first byte in the array
|
||||||
* @param len the length of the byte sequence
|
* @param len the length of the byte sequence
|
||||||
@ -641,7 +657,6 @@ public static int bytesToCodePoint(ByteBuffer bytes) {
|
|||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static final int offsetsFromUTF8[] =
|
static final int offsetsFromUTF8[] =
|
||||||
{ 0x00000000, 0x00003080,
|
{ 0x00000000, 0x00003080,
|
||||||
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
|
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
|
||||||
|
@ -291,9 +291,9 @@ public void testTextText() throws CharacterCodingException {
|
|||||||
a.append("xdefgxxx".getBytes(), 1, 4);
|
a.append("xdefgxxx".getBytes(), 1, 4);
|
||||||
assertEquals("modified aliased string", "abc", b.toString());
|
assertEquals("modified aliased string", "abc", b.toString());
|
||||||
assertEquals("appended string incorrectly", "abcdefg", a.toString());
|
assertEquals("appended string incorrectly", "abcdefg", a.toString());
|
||||||
// add an extra byte so that capacity = 14 and length = 8
|
// add an extra byte so that capacity = 10 and length = 8
|
||||||
a.append(new byte[]{'d'}, 0, 1);
|
a.append(new byte[]{'d'}, 0, 1);
|
||||||
assertEquals(14, a.getBytes().length);
|
assertEquals(10, a.getBytes().length);
|
||||||
assertEquals(8, a.copyBytes().length);
|
assertEquals(8, a.copyBytes().length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user