From 7101477e4726a70ab0eab57c2d4480a04446a8dc Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Thu, 1 Jun 2017 15:20:18 -0700 Subject: [PATCH] HDFS-11383. Intern strings in BlockLocation and ExtendedBlock. Contributed by Misha Dmitriev. --- .../org/apache/hadoop/fs/BlockLocation.java | 21 ++++++----- .../apache/hadoop/util/StringInterner.java | 37 ++++++++++--------- .../hadoop/hdfs/protocol/ExtendedBlock.java | 16 +++++--- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java index b8cad3afe1..591febf0fd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.StringInterner; /** * Represents the network location of a block, information about the hosts @@ -114,27 +115,27 @@ public BlockLocation(String[] names, String[] hosts, String[] cachedHosts, if (names == null) { this.names = EMPTY_STR_ARRAY; } else { - this.names = names; + this.names = StringInterner.internStringsInArray(names); } if (hosts == null) { this.hosts = EMPTY_STR_ARRAY; } else { - this.hosts = hosts; + this.hosts = StringInterner.internStringsInArray(hosts); } if (cachedHosts == null) { this.cachedHosts = EMPTY_STR_ARRAY; } else { - this.cachedHosts = cachedHosts; + this.cachedHosts = StringInterner.internStringsInArray(cachedHosts); } if (topologyPaths == null) { this.topologyPaths = EMPTY_STR_ARRAY; } else { - this.topologyPaths = topologyPaths; + this.topologyPaths = StringInterner.internStringsInArray(topologyPaths); } if (storageIds == null) { this.storageIds = EMPTY_STR_ARRAY; } else { - this.storageIds = storageIds; + this.storageIds = StringInterner.internStringsInArray(storageIds); } if (storageTypes == null) { this.storageTypes = EMPTY_STORAGE_TYPE_ARRAY; @@ -238,7 +239,7 @@ public void setHosts(String[] hosts) throws IOException { if (hosts == null) { this.hosts = EMPTY_STR_ARRAY; } else { - this.hosts = hosts; + this.hosts = StringInterner.internStringsInArray(hosts); } } @@ -249,7 +250,7 @@ public void setCachedHosts(String[] cachedHosts) { if (cachedHosts == null) { this.cachedHosts = EMPTY_STR_ARRAY; } else { - this.cachedHosts = cachedHosts; + this.cachedHosts = StringInterner.internStringsInArray(cachedHosts); } } @@ -260,7 +261,7 @@ public void setNames(String[] names) throws IOException { if (names == null) { this.names = EMPTY_STR_ARRAY; } else { - this.names = names; + this.names = StringInterner.internStringsInArray(names); } } @@ -271,7 +272,7 @@ public void setTopologyPaths(String[] topologyPaths) throws IOException { if (topologyPaths == null) { this.topologyPaths = EMPTY_STR_ARRAY; } else { - this.topologyPaths = topologyPaths; + this.topologyPaths = StringInterner.internStringsInArray(topologyPaths); } } @@ -279,7 +280,7 @@ public void setStorageIds(String[] storageIds) { if (storageIds == null) { this.storageIds = EMPTY_STR_ARRAY; } else { - this.storageIds = storageIds; + this.storageIds = StringInterner.internStringsInArray(storageIds); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java index d74f8106a8..028e49acbe 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java @@ -25,8 +25,9 @@ import com.google.common.collect.Interners; /** - * Provides equivalent behavior to String.intern() to optimize performance, - * whereby does not consume memory in the permanent generation. + * Provides string interning utility methods. For weak interning, + * we use the standard String.intern() call, that performs very well + * (no problems with PermGen overflowing, etc.) starting from JDK 7. */ @InterfaceAudience.Public @InterfaceStability.Stable @@ -35,20 +36,9 @@ public class StringInterner { /** * Retains a strong reference to each string instance it has interned. */ - private final static Interner strongInterner; - - /** - * Retains a weak reference to each string instance it has interned. - */ - private final static Interner weakInterner; - - - - static { - strongInterner = Interners.newStrongInterner(); - weakInterner = Interners.newWeakInterner(); - } - + private final static Interner STRONG_INTERNER = + Interners.newStrongInterner(); + /** * Interns and returns a reference to the representative instance * for any of a collection of string instances that are equal to each other. @@ -62,7 +52,7 @@ public static String strongIntern(String sample) { if (sample == null) { return null; } - return strongInterner.intern(sample); + return STRONG_INTERNER.intern(sample); } /** @@ -78,7 +68,18 @@ public static String weakIntern(String sample) { if (sample == null) { return null; } - return weakInterner.intern(sample); + return sample.intern(); + } + + /** + * Interns all the strings in the given array in place, + * returning the same array. + */ + public static String[] internStringsInArray(String[] strings) { + for (int i = 0; i < strings.length; i++) { + strings[i] = weakIntern(strings[i]); + } + return strings; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ExtendedBlock.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ExtendedBlock.java index af24909f01..7939662ee3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ExtendedBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ExtendedBlock.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.protocol; +import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -42,13 +43,13 @@ public ExtendedBlock(final String poolId, final long blockId) { } public ExtendedBlock(String poolId, Block b) { - this.poolId = poolId; + this.poolId = poolId != null ? poolId.intern() : null; this.block = b; } public ExtendedBlock(final String poolId, final long blkid, final long len, final long genstamp) { - this.poolId = poolId; + this.poolId = poolId != null ? poolId.intern() : null; block = new Block(blkid, len, genstamp); } @@ -86,7 +87,7 @@ public void setNumBytes(final long len) { } public void set(String poolId, Block blk) { - this.poolId = poolId; + this.poolId = poolId != null ? poolId.intern() : null; this.block = blk; } @@ -107,13 +108,16 @@ public boolean equals(Object o) { return false; } ExtendedBlock b = (ExtendedBlock)o; - return b.block.equals(block) && b.poolId.equals(poolId); + return b.block.equals(block) && + (b.poolId != null ? b.poolId.equals(poolId) : poolId == null); } @Override // Object public int hashCode() { - int result = 31 + poolId.hashCode(); - return (31 * result + block.hashCode()); + return new HashCodeBuilder(31, 17). + append(poolId). + append(block). + toHashCode(); } @Override // Object