HADOOP-17141. Add Capability To Get Text Length (#2157)

Contributed by David Mollitor
This commit is contained in:
belugabehr 2020-07-24 05:37:28 -04:00 committed by GitHub
parent 247eb0979b
commit e60096c377
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 0 deletions

View File

@ -77,6 +77,7 @@ protected CharsetDecoder initialValue() {
private byte[] bytes = EMPTY_BYTES; private byte[] bytes = EMPTY_BYTES;
private int length = 0; private int length = 0;
private int textLength = -1;
/** /**
* Construct an empty text string. * Construct an empty text string.
@ -131,6 +132,17 @@ public int getLength() {
return length; return length;
} }
/**
* Returns the length of this text. The length is equal to the number of
* Unicode code units in the text.
*/
public int getTextLength() {
if (textLength < 0) {
textLength = toString().length();
}
return textLength;
}
/** /**
* Returns the Unicode Scalar Value (32-bit integer value) * Returns the Unicode Scalar Value (32-bit integer value)
* for the character at <code>position</code>. Note that this * for the character at <code>position</code>. Note that this
@ -204,6 +216,7 @@ public void set(String string) {
ByteBuffer bb = encode(string, true); ByteBuffer bb = encode(string, true);
bytes = bb.array(); bytes = bb.array();
length = bb.limit(); length = bb.limit();
textLength = string.length();
} catch (CharacterCodingException e) { } catch (CharacterCodingException e) {
throw new RuntimeException("Should not have happened", e); throw new RuntimeException("Should not have happened", e);
} }
@ -221,6 +234,7 @@ public void set(byte[] utf8) {
*/ */
public void set(Text other) { public void set(Text other) {
set(other.getBytes(), 0, other.getLength()); set(other.getBytes(), 0, other.getLength());
this.textLength = other.textLength;
} }
/** /**
@ -234,6 +248,7 @@ public void set(byte[] utf8, int start, int len) {
ensureCapacity(len); ensureCapacity(len);
System.arraycopy(utf8, start, bytes, 0, len); System.arraycopy(utf8, start, bytes, 0, len);
this.length = len; this.length = len;
this.textLength = -1;
} }
/** /**
@ -251,6 +266,7 @@ public void append(byte[] utf8, int start, int len) {
} }
System.arraycopy(utf8, start, bytes, length, len); System.arraycopy(utf8, start, bytes, length, len);
length += len; length += len;
textLength = -1;
} }
/** /**
@ -263,6 +279,7 @@ public void append(byte[] utf8, int start, int len) {
*/ */
public void clear() { public void clear() {
length = 0; length = 0;
textLength = -1;
} }
/** /**
@ -327,6 +344,7 @@ public void readWithKnownLength(DataInput in, int len) throws IOException {
ensureCapacity(len); ensureCapacity(len);
in.readFully(bytes, 0, len); in.readFully(bytes, 0, len);
length = len; length = len;
textLength = -1;
} }
/** /**

View File

@ -268,6 +268,8 @@ public void testClear() throws Exception {
0, text.getBytes().length); 0, text.getBytes().length);
assertEquals("String's length must be zero", assertEquals("String's length must be zero",
0, text.getLength()); 0, text.getLength());
assertEquals("String's text length must be zero",
0, text.getTextLength());
// Test if clear works as intended // Test if clear works as intended
text = new Text("abcd\u20acbdcd\u20ac"); text = new Text("abcd\u20acbdcd\u20ac");
@ -280,6 +282,8 @@ public void testClear() throws Exception {
text.getBytes().length >= len); text.getBytes().length >= len);
assertEquals("Length of the string must be reset to 0 after clear()", assertEquals("Length of the string must be reset to 0 after clear()",
0, text.getLength()); 0, text.getLength());
assertEquals("Text length of the string must be reset to 0 after clear()",
0, text.getTextLength());
} }
@Test @Test
@ -288,9 +292,12 @@ public void testTextText() throws CharacterCodingException {
Text b=new Text("a"); Text b=new Text("a");
b.set(a); b.set(a);
assertEquals("abc", b.toString()); assertEquals("abc", b.toString());
assertEquals(3, a.getTextLength());
assertEquals(3, b.getTextLength());
a.append("xdefgxxx".getBytes(), 1, 4); a.append("xdefgxxx".getBytes(), 1, 4);
assertEquals("modified aliased string", "abc", b.toString()); assertEquals("modified aliased string", "abc", b.toString());
assertEquals("appended string incorrectly", "abcdefg", a.toString()); assertEquals("appended string incorrectly", "abcdefg", a.toString());
assertEquals("This should reflect in the lenght", 7, a.getTextLength());
// add an extra byte so that capacity = 10 and length = 8 // add an extra byte so that capacity = 10 and length = 8
a.append(new byte[]{'d'}, 0, 1); a.append(new byte[]{'d'}, 0, 1);
assertEquals(10, a.getBytes().length); assertEquals(10, a.getBytes().length);
@ -392,16 +399,19 @@ public void testReadWithKnownLength() throws IOException {
in.reset(inputBytes, inputBytes.length); in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 5); text.readWithKnownLength(in, 5);
assertEquals("hello", text.toString()); assertEquals("hello", text.toString());
assertEquals(5, text.getTextLength());
// Read longer length, make sure it lengthens // Read longer length, make sure it lengthens
in.reset(inputBytes, inputBytes.length); in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 7); text.readWithKnownLength(in, 7);
assertEquals("hello w", text.toString()); assertEquals("hello w", text.toString());
assertEquals(7, text.getTextLength());
// Read shorter length, make sure it shortens // Read shorter length, make sure it shortens
in.reset(inputBytes, inputBytes.length); in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 2); text.readWithKnownLength(in, 2);
assertEquals("he", text.toString()); assertEquals("he", text.toString());
assertEquals(2, text.getTextLength());
} }
/** /**