HADOOP-17141. Add Capability To Get Text Length (#2157)
Contributed by David Mollitor
This commit is contained in:
parent
247eb0979b
commit
e60096c377
@ -77,6 +77,7 @@ protected CharsetDecoder initialValue() {
|
|||||||
|
|
||||||
private byte[] bytes = EMPTY_BYTES;
|
private byte[] bytes = EMPTY_BYTES;
|
||||||
private int length = 0;
|
private int length = 0;
|
||||||
|
private int textLength = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct an empty text string.
|
* Construct an empty text string.
|
||||||
@ -131,6 +132,17 @@ public int getLength() {
|
|||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length of this text. The length is equal to the number of
|
||||||
|
* Unicode code units in the text.
|
||||||
|
*/
|
||||||
|
public int getTextLength() {
|
||||||
|
if (textLength < 0) {
|
||||||
|
textLength = toString().length();
|
||||||
|
}
|
||||||
|
return textLength;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the Unicode Scalar Value (32-bit integer value)
|
* Returns the Unicode Scalar Value (32-bit integer value)
|
||||||
* for the character at <code>position</code>. Note that this
|
* for the character at <code>position</code>. Note that this
|
||||||
@ -204,6 +216,7 @@ public void set(String string) {
|
|||||||
ByteBuffer bb = encode(string, true);
|
ByteBuffer bb = encode(string, true);
|
||||||
bytes = bb.array();
|
bytes = bb.array();
|
||||||
length = bb.limit();
|
length = bb.limit();
|
||||||
|
textLength = string.length();
|
||||||
} catch (CharacterCodingException e) {
|
} catch (CharacterCodingException e) {
|
||||||
throw new RuntimeException("Should not have happened", e);
|
throw new RuntimeException("Should not have happened", e);
|
||||||
}
|
}
|
||||||
@ -221,6 +234,7 @@ public void set(byte[] utf8) {
|
|||||||
*/
|
*/
|
||||||
public void set(Text other) {
|
public void set(Text other) {
|
||||||
set(other.getBytes(), 0, other.getLength());
|
set(other.getBytes(), 0, other.getLength());
|
||||||
|
this.textLength = other.textLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -234,6 +248,7 @@ public void set(byte[] utf8, int start, int len) {
|
|||||||
ensureCapacity(len);
|
ensureCapacity(len);
|
||||||
System.arraycopy(utf8, start, bytes, 0, len);
|
System.arraycopy(utf8, start, bytes, 0, len);
|
||||||
this.length = len;
|
this.length = len;
|
||||||
|
this.textLength = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -251,6 +266,7 @@ public void append(byte[] utf8, int start, int len) {
|
|||||||
}
|
}
|
||||||
System.arraycopy(utf8, start, bytes, length, len);
|
System.arraycopy(utf8, start, bytes, length, len);
|
||||||
length += len;
|
length += len;
|
||||||
|
textLength = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -263,6 +279,7 @@ public void append(byte[] utf8, int start, int len) {
|
|||||||
*/
|
*/
|
||||||
public void clear() {
|
public void clear() {
|
||||||
length = 0;
|
length = 0;
|
||||||
|
textLength = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -327,6 +344,7 @@ public void readWithKnownLength(DataInput in, int len) throws IOException {
|
|||||||
ensureCapacity(len);
|
ensureCapacity(len);
|
||||||
in.readFully(bytes, 0, len);
|
in.readFully(bytes, 0, len);
|
||||||
length = len;
|
length = len;
|
||||||
|
textLength = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -268,6 +268,8 @@ public void testClear() throws Exception {
|
|||||||
0, text.getBytes().length);
|
0, text.getBytes().length);
|
||||||
assertEquals("String's length must be zero",
|
assertEquals("String's length must be zero",
|
||||||
0, text.getLength());
|
0, text.getLength());
|
||||||
|
assertEquals("String's text length must be zero",
|
||||||
|
0, text.getTextLength());
|
||||||
|
|
||||||
// Test if clear works as intended
|
// Test if clear works as intended
|
||||||
text = new Text("abcd\u20acbdcd\u20ac");
|
text = new Text("abcd\u20acbdcd\u20ac");
|
||||||
@ -280,6 +282,8 @@ public void testClear() throws Exception {
|
|||||||
text.getBytes().length >= len);
|
text.getBytes().length >= len);
|
||||||
assertEquals("Length of the string must be reset to 0 after clear()",
|
assertEquals("Length of the string must be reset to 0 after clear()",
|
||||||
0, text.getLength());
|
0, text.getLength());
|
||||||
|
assertEquals("Text length of the string must be reset to 0 after clear()",
|
||||||
|
0, text.getTextLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -288,9 +292,12 @@ public void testTextText() throws CharacterCodingException {
|
|||||||
Text b=new Text("a");
|
Text b=new Text("a");
|
||||||
b.set(a);
|
b.set(a);
|
||||||
assertEquals("abc", b.toString());
|
assertEquals("abc", b.toString());
|
||||||
|
assertEquals(3, a.getTextLength());
|
||||||
|
assertEquals(3, b.getTextLength());
|
||||||
a.append("xdefgxxx".getBytes(), 1, 4);
|
a.append("xdefgxxx".getBytes(), 1, 4);
|
||||||
assertEquals("modified aliased string", "abc", b.toString());
|
assertEquals("modified aliased string", "abc", b.toString());
|
||||||
assertEquals("appended string incorrectly", "abcdefg", a.toString());
|
assertEquals("appended string incorrectly", "abcdefg", a.toString());
|
||||||
|
assertEquals("This should reflect in the lenght", 7, a.getTextLength());
|
||||||
// add an extra byte so that capacity = 10 and length = 8
|
// add an extra byte so that capacity = 10 and length = 8
|
||||||
a.append(new byte[]{'d'}, 0, 1);
|
a.append(new byte[]{'d'}, 0, 1);
|
||||||
assertEquals(10, a.getBytes().length);
|
assertEquals(10, a.getBytes().length);
|
||||||
@ -392,16 +399,19 @@ public void testReadWithKnownLength() throws IOException {
|
|||||||
in.reset(inputBytes, inputBytes.length);
|
in.reset(inputBytes, inputBytes.length);
|
||||||
text.readWithKnownLength(in, 5);
|
text.readWithKnownLength(in, 5);
|
||||||
assertEquals("hello", text.toString());
|
assertEquals("hello", text.toString());
|
||||||
|
assertEquals(5, text.getTextLength());
|
||||||
|
|
||||||
// Read longer length, make sure it lengthens
|
// Read longer length, make sure it lengthens
|
||||||
in.reset(inputBytes, inputBytes.length);
|
in.reset(inputBytes, inputBytes.length);
|
||||||
text.readWithKnownLength(in, 7);
|
text.readWithKnownLength(in, 7);
|
||||||
assertEquals("hello w", text.toString());
|
assertEquals("hello w", text.toString());
|
||||||
|
assertEquals(7, text.getTextLength());
|
||||||
|
|
||||||
// Read shorter length, make sure it shortens
|
// Read shorter length, make sure it shortens
|
||||||
in.reset(inputBytes, inputBytes.length);
|
in.reset(inputBytes, inputBytes.length);
|
||||||
text.readWithKnownLength(in, 2);
|
text.readWithKnownLength(in, 2);
|
||||||
assertEquals("he", text.toString());
|
assertEquals("he", text.toString());
|
||||||
|
assertEquals(2, text.getTextLength());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user