From 605ed85c291a6250b077da32a49dbb35f3b78bf7 Mon Sep 17 00:00:00 2001 From: Stephen O'Donnell Date: Mon, 26 Apr 2021 11:00:23 +0100 Subject: [PATCH] HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell --- .../server/datanode/DirectoryScanner.java | 2 +- .../datanode/fsdataset/FsVolumeSpi.java | 118 ++++++++---------- .../datanode/fsdataset/impl/FsVolumeImpl.java | 5 +- .../server/datanode/TestDirectoryScanner.java | 37 +++--- .../fsdataset/impl/TestFsDatasetImpl.java | 4 +- 5 files changed, 77 insertions(+), 89 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 63865f69f2..a3bceec9a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -582,7 +582,7 @@ public class DirectoryScanner implements Runnable { long blockId, FsVolumeSpi vol) { statsRecord.missingBlockFile++; statsRecord.missingMetaFile++; - diffRecord.add(new ScanInfo(blockId, null, null, vol)); + diffRecord.add(new ScanInfo(blockId, null, null, null, vol)); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java index 68d1a15d5c..8ae204364f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java @@ -227,27 +227,27 @@ public interface FsVolumeSpi */ public static class ScanInfo implements Comparable { private final long blockId; - /** - * The block file path, relative to the volume's base directory. - * If there was no block file found, this may be null. If 'vol' - * is null, then this is the full path of the block file. + * The full path to the folder containing the block / meta files. */ - private final String blockSuffix; - + private final File basePath; /** - * The suffix of the meta file path relative to the block file. - * If blockSuffix is null, then this will be the entire path relative - * to the volume base directory, or an absolute path if vol is also - * null. + * The block file name, with no path */ - private final String metaSuffix; + private final String blockFile; + /** + * Holds the meta file name, with no path, only if blockFile is null. + * If blockFile is not null, the meta file will be named identically to + * the blockFile, but with a suffix like "_1234.meta". If the blockFile + * is present, we store only the meta file suffix. + */ + private final String metaFile; private final FsVolumeSpi volume; private final FileRegion fileRegion; /** - * Get the file's length in async block scan + * Get the file's length in async block scan. */ private final long blockLength; @@ -257,35 +257,19 @@ public interface FsVolumeSpi private final static String QUOTED_FILE_SEPARATOR = Matcher.quoteReplacement(File.separator); - /** - * Get the most condensed version of the path. - * - * For example, the condensed version of /foo//bar is /foo/bar - * Unlike {@link File#getCanonicalPath()}, this will never perform I/O - * on the filesystem. - * - * @param path the path to condense - * @return the condensed path - */ - private static String getCondensedPath(String path) { - return CONDENSED_PATH_REGEX.matcher(path). - replaceAll(QUOTED_FILE_SEPARATOR); - } - /** * Get a path suffix. * - * @param f The file to get the suffix for. + * @param f The string to get the suffix for. * @param prefix The prefix we're stripping off. * - * @return A suffix such that prefix + suffix = path to f + * @return A suffix such that prefix + suffix = f */ - private static String getSuffix(File f, String prefix) { - String fullPath = getCondensedPath(f.getAbsolutePath()); - if (fullPath.startsWith(prefix)) { - return fullPath.substring(prefix.length()); + private static String getSuffix(String f, String prefix) { + if (f.startsWith(prefix)) { + return f.substring(prefix.length()); } - throw new RuntimeException(prefix + " is not a prefix of " + fullPath); + throw new RuntimeException(prefix + " is not a prefix of " + f); } /** @@ -293,27 +277,27 @@ public interface FsVolumeSpi * the block data and meta-data files. * * @param blockId the block ID - * @param blockFile the path to the block data file - * @param metaFile the path to the block meta-data file + * @param basePath The full path to the directory the block is stored in + * @param blockFile The block filename, with no path + * @param metaFile The meta filename, with no path. If blockFile is not null + * then the metaFile and blockFile should have the same + * prefix, with the meta file having a suffix like + * "_1234.meta". To save memory, if the blockFile is present + * we store only the meta file suffix in the object * @param vol the volume that contains the block */ - public ScanInfo(long blockId, File blockFile, File metaFile, - FsVolumeSpi vol) { + public ScanInfo(long blockId, File basePath, String blockFile, + String metaFile, FsVolumeSpi vol) { this.blockId = blockId; - String condensedVolPath = - (vol == null || vol.getBaseURI() == null) ? null : - getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath()); - this.blockSuffix = blockFile == null ? null : - getSuffix(blockFile, condensedVolPath); - this.blockLength = (blockFile != null) ? blockFile.length() : 0; - if (metaFile == null) { - this.metaSuffix = null; - } else if (blockFile == null) { - this.metaSuffix = getSuffix(metaFile, condensedVolPath); + this.basePath = basePath; + this.blockFile = blockFile; + if (blockFile != null && metaFile != null) { + this.metaFile = getSuffix(metaFile, blockFile); } else { - this.metaSuffix = getSuffix(metaFile, - condensedVolPath + blockSuffix); + this.metaFile = metaFile; } + this.blockLength = (blockFile != null) ? + new File(basePath, blockFile).length() : 0; this.volume = vol; this.fileRegion = null; } @@ -333,8 +317,9 @@ public interface FsVolumeSpi this.blockLength = length; this.volume = vol; this.fileRegion = fileRegion; - this.blockSuffix = null; - this.metaSuffix = null; + this.basePath = null; + this.blockFile = null; + this.metaFile = null; } /** @@ -343,8 +328,8 @@ public interface FsVolumeSpi * @return the block data file */ public File getBlockFile() { - return (blockSuffix == null) ? null : - new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix); + return (blockFile == null) ? null : + new File(basePath.getAbsolutePath(), blockFile); } /** @@ -363,15 +348,10 @@ public interface FsVolumeSpi * @return the block meta data file */ public File getMetaFile() { - if (metaSuffix == null) { + if (metaFile == null) { return null; } - String fileSuffix = metaSuffix; - if (blockSuffix != null) { - fileSuffix = blockSuffix + metaSuffix; - } - return new File(new File(volume.getBaseURI()).getAbsolutePath(), - fileSuffix); + return new File(basePath.getAbsolutePath(), fullMetaFile()); } /** @@ -414,14 +394,24 @@ public interface FsVolumeSpi } public long getGenStamp() { - return metaSuffix != null ? Block.getGenerationStamp( - getMetaFile().getName()) : - HdfsConstants.GRANDFATHER_GENERATION_STAMP; + return metaFile != null ? Block.getGenerationStamp(fullMetaFile()) + : HdfsConstants.GRANDFATHER_GENERATION_STAMP; } public FileRegion getFileRegion() { return fileRegion; } + + private String fullMetaFile() { + if (metaFile == null) { + return null; + } + if (blockFile == null) { + return metaFile; + } else { + return blockFile + metaFile; + } + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index 6681f6fd64..1dda8584f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -1451,7 +1451,7 @@ public class FsVolumeImpl implements FsVolumeSpi { long blockId = Block.getBlockId(file.getName()); verifyFileLocation(file, bpFinalizedDir, blockId); - report.add(new ScanInfo(blockId, null, file, this)); + report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this)); } continue; } @@ -1474,7 +1474,8 @@ public class FsVolumeImpl implements FsVolumeSpi { } } verifyFileLocation(blockFile, bpFinalizedDir, blockId); - report.add(new ScanInfo(blockId, blockFile, metaFile, this)); + report.add(new ScanInfo(blockId, dir, blockFile.getName(), + metaFile == null ? null : metaFile.getName(), this)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java index 44d99a292b..e2a15a8da0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java @@ -1040,19 +1040,21 @@ public class TestDirectoryScanner { private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231"; - void testScanInfoObject(long blockId, File blockFile, File metaFile) + void testScanInfoObject(long blockId, File baseDir, String blockFile, + String metaFile) throws Exception { FsVolumeSpi.ScanInfo scanInfo = - new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME); + new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile, + TEST_VOLUME); assertEquals(blockId, scanInfo.getBlockId()); if (blockFile != null) { - assertEquals(blockFile.getAbsolutePath(), + assertEquals(new File(baseDir, blockFile).getAbsolutePath(), scanInfo.getBlockFile().getAbsolutePath()); } else { assertNull(scanInfo.getBlockFile()); } if (metaFile != null) { - assertEquals(metaFile.getAbsolutePath(), + assertEquals(new File(baseDir, metaFile).getAbsolutePath(), scanInfo.getMetaFile().getAbsolutePath()); } else { assertNull(scanInfo.getMetaFile()); @@ -1062,7 +1064,7 @@ public class TestDirectoryScanner { void testScanInfoObject(long blockId) throws Exception { FsVolumeSpi.ScanInfo scanInfo = - new FsVolumeSpi.ScanInfo(blockId, null, null, null); + new FsVolumeSpi.ScanInfo(blockId, null, null, null, null); assertEquals(blockId, scanInfo.getBlockId()); assertNull(scanInfo.getBlockFile()); assertNull(scanInfo.getMetaFile()); @@ -1071,24 +1073,19 @@ public class TestDirectoryScanner { @Test(timeout = 120000) public void TestScanInfo() throws Exception { testScanInfoObject(123, - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123"), - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123__1001.meta")); + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()), + "blk_123", "blk_123__1001.meta"); testScanInfoObject(464, - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123"), - null); - testScanInfoObject(523, null, - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123__1009.meta")); - testScanInfoObject(789, null, null); + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()), + "blk_123", null); + testScanInfoObject(523, + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()), + null, "blk_123__1009.meta"); + testScanInfoObject(789, null, null, null); testScanInfoObject(456); testScanInfoObject(123, - new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), - "blk_567"), - new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), - "blk_567__1004.meta")); + new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()), + "blk_567", "blk_567__1004.meta"); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 778ef97180..fbd9f005e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -1786,8 +1786,8 @@ public class TestFsDatasetImpl { assertFalse(metaFile.exists()); FsVolumeSpi.ScanInfo info = new FsVolumeSpi.ScanInfo( - replicaInfo.getBlockId(), blockFile.getAbsoluteFile(), - metaFile.getAbsoluteFile(), replicaInfo.getVolume()); + replicaInfo.getBlockId(), blockFile.getParentFile().getAbsoluteFile(), + blockFile.getName(), metaFile.getName(), replicaInfo.getVolume()); fsdataset.checkAndUpdate(bpid, info); BlockManager blockManager = cluster.getNameNode().