diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b223373cfd..033543b2e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -290,6 +290,9 @@ Release 2.1.0-beta - UNRELEASED IMPROVEMENTS + HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every + block that is scanned (Colin Patrick McCabe) + HDFS-4222. NN is unresponsive and loses heartbeats from DNs when configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 5d870d771e..92f5d63dc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -33,6 +33,8 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -154,30 +156,109 @@ public Map toSortedArrays() { * Tracks the files and other information related to a block on the disk * Missing file is indicated by setting the corresponding member * to null. + * + * Because millions of these structures may be created, we try to save + * memory here. So instead of storing full paths, we store path suffixes. + * The block file, if it exists, will have a path like this: + * / + * So we don't need to store the volume path, since we already know what the + * volume is. + * + * The metadata file, if it exists, will have a path like this: + * /_.meta + * So if we have a block file, there isn't any need to store the block path + * again. + * + * The accessor functions take care of these manipulations. */ static class ScanInfo implements Comparable { private final long blockId; - private final File metaFile; - private final File blockFile; + + /** + * The block file path, relative to the volume's base directory. + * If there was no block file found, this may be null. If 'vol' + * is null, then this is the full path of the block file. + */ + private final String blockSuffix; + + /** + * The suffix of the meta file path relative to the block file. + * If blockSuffix is null, then this will be the entire path relative + * to the volume base directory, or an absolute path if vol is also + * null. + */ + private final String metaSuffix; + private final FsVolumeSpi volume; + private final static Pattern CONDENSED_PATH_REGEX = + Pattern.compile("(?