HADOOP-6298. Add copyBytes to Text and BytesWritable. (omalley)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1050070 13f79535-47bb-0310-9956-ffa450edef68
2010-12-16 17:43:47 +00:00 · 2010-12-16 17:43:47 +00:00 · 29220b99d6
commit 29220b99d6
parent 19fc216767
5 changed files with 40 additions and 13 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -211,8 +211,8 @@ Release 0.22.0 - Unreleased
    HADOOP-7009. MD5Hash provides a public factory method that creates an
    instance of thread local MessageDigest. (hairong)

-    HADOOP-7008. Enable test-patch.sh to have a configured number of acceptable 
-    findbugs and javadoc warnings. (nigel and gkesavan)
+    HADOOP-7008. Enable test-patch.sh to have a configured number of 
+    acceptable findbugs and javadoc warnings. (nigel and gkesavan)

    HADOOP-6818. Provides a JNI implementation of group resolution. (ddas)

@ -227,17 +227,22 @@ Release 0.22.0 - Unreleased
    HADOOP-7024. Create a test method for adding file systems during tests.
    (Kan Zhang via jghoman)

-    HADOOP-6903. Make AbstractFSileSystem methods and some FileContext methods to be public. (Sanjay Radia via Sanjay Radia)
+    HADOOP-6903. Make AbstractFSileSystem methods and some FileContext methods
+    to be public. (Sanjay Radia)

-    HADOOP-7034. Add TestPath tests to cover dot, dot dot, and slash normalization. (eli)
+    HADOOP-7034. Add TestPath tests to cover dot, dot dot, and slash 
+    normalization. (eli)

    HADOOP-7032. Assert type constraints in the FileStatus constructor. (eli)

-    HADOOP-6562. FileContextSymlinkBaseTest should use FileContextTestHelper. (eli)
+    HADOOP-6562. FileContextSymlinkBaseTest should use FileContextTestHelper. 
+    (eli)

    HADOOP-7028. ant eclipse does not include requisite ant.jar in the 
    classpath. (Patrick Angeles via eli)

+    HADOOP-6298. Add copyBytes to Text and BytesWritable. (omalley)
+
  OPTIMIZATIONS

    HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..).
--- a/src/java/org/apache/hadoop/io/BytesWritable.java
+++ b/src/java/org/apache/hadoop/io/BytesWritable.java
@ -22,8 +22,6 @@
 import java.io.DataInput;
 import java.io.DataOutput;

-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;

@ -37,7 +35,6 @@
@InterfaceStability.Stable
 public class BytesWritable extends BinaryComparable
    implements WritableComparable<BinaryComparable> {
-  private static final Log LOG = LogFactory.getLog(BytesWritable.class);
  private static final int LENGTH_BYTES = 4;
  private static final byte[] EMPTY_BYTES = {};

@ -59,7 +56,18 @@ public BytesWritable(byte[] bytes) {
  }
  
  /**
-   * Get the data from the BytesWritable.
+   * Get a copy of the bytes that is exactly the length of the data.
+   * See {@link #getBytes()} for faster access to the underlying array.
+   */
+  public byte[] copyBytes() {
+    byte[] result = new byte[size];
+    System.arraycopy(bytes, 0, result, 0, size);
+    return result;
+  }
+  
+  /**
+   * Get the data backing the BytesWritable. Please use {@link #copyBytes()}
+   * if you need the returned array to be precisely the length of the data.
   * @return The data is only valid between 0 and getLength() - 1.
   */
  public byte[] getBytes() {
--- a/src/java/org/apache/hadoop/io/Text.java
+++ b/src/java/org/apache/hadoop/io/Text.java
@ -35,8 +35,6 @@

 import org.apache.avro.reflect.Stringable;

-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;

@ -54,7 +52,6 @@
@InterfaceStability.Stable
 public class Text extends BinaryComparable
    implements WritableComparable<BinaryComparable> {
-  private static final Log LOG= LogFactory.getLog(Text.class);
  
  private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
    new ThreadLocal<CharsetEncoder>() {
@ -100,9 +97,20 @@ public Text(byte[] utf8)  {
    set(utf8);
  }
  
+  /**
+   * Get a copy of the bytes that is exactly the length of the data.
+   * See {@link #getBytes()} for faster access to the underlying array.
+   */
+  public byte[] copyBytes() {
+    byte[] result = new byte[length];
+    System.arraycopy(bytes, 0, result, 0, length);
+    return result;
+  }
+  
  /**
   * Returns the raw bytes; however, only data up to {@link #getLength()} is
-   * valid.
+   * valid. Please use {@link #copyBytes()} if you
+   * need the returned array to be precisely the length of the data.
   */
  public byte[] getBytes() {
    return bytes;
--- a/src/test/core/org/apache/hadoop/io/TestBytesWritable.java
+++ b/src/test/core/org/apache/hadoop/io/TestBytesWritable.java
@ -40,6 +40,8 @@ public void testSizeChange() throws Exception {
    for(int i=0; i < size*2; ++i) {
      assertEquals(hadoop[i%size], buf.getBytes()[i]);
    }
+    // ensure that copyBytes is exactly the right length
+    assertEquals(size*4, buf.copyBytes().length);
    // shrink the buffer
    buf.setCapacity(1);
    // make sure the size has been cut down too
--- a/src/test/core/org/apache/hadoop/io/TestText.java
+++ b/src/test/core/org/apache/hadoop/io/TestText.java
@ -215,6 +215,10 @@ public void testTextText() throws CharacterCodingException {
    a.append("xdefgxxx".getBytes(), 1, 4);
    assertEquals("modified aliased string", "abc", b.toString());
    assertEquals("appended string incorrectly", "abcdefg", a.toString());
+    // add an extra byte so that capacity = 14 and length = 8
+    a.append(new byte[]{'d'}, 0, 1);
+    assertEquals(14, a.getBytes().length);
+    assertEquals(8, a.copyBytes().length);
  }
  
  private class ConcurrentEncodeDecodeThread extends Thread {