From e93ebeae51a7c4dc0840e2a73b3ac09a033a0c29 Mon Sep 17 00:00:00 2001 From: Christopher Douglas Date: Mon, 29 Jun 2009 07:16:56 +0000 Subject: [PATCH] HADOOP-6109. Change Text to grow its internal buffer exponentially, rather than the max of the current length and the proposed length to improve performance reading large values. Contributed by thushara wijeratna git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@789242 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 ++++ src/java/org/apache/hadoop/io/Text.java | 7 ++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 13c0eadfde..b90bd7fb4e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -464,6 +464,10 @@ Trunk (unreleased changes) HADOOP-5925. EC2 scripts should exit on error. (tomwhite) + HADOOP-6109. Change Text to grow its internal buffer exponentially, rather + than the max of the current length and the proposed length to improve + performance reading large values. (thushara wijeratna via cdouglas) + OPTIMIZATIONS HADOOP-5595. NameNode does not need to run a replicator to choose a diff --git a/src/java/org/apache/hadoop/io/Text.java b/src/java/org/apache/hadoop/io/Text.java index 19faa8768d..2110a34f49 100644 --- a/src/java/org/apache/hadoop/io/Text.java +++ b/src/java/org/apache/hadoop/io/Text.java @@ -31,6 +31,7 @@ import java.nio.charset.MalformedInputException; import java.text.CharacterIterator; import java.text.StringCharacterIterator; +import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -237,11 +238,11 @@ public void clear() { */ private void setCapacity(int len, boolean keepData) { if (bytes == null || bytes.length < len) { - byte[] newBytes = new byte[len]; if (bytes != null && keepData) { - System.arraycopy(bytes, 0, newBytes, 0, length); + bytes = Arrays.copyOf(bytes, Math.max(len,length << 1)); + } else { + bytes = new byte[len]; } - bytes = newBytes; } }