From 16c8dbde574f49827fde5ee9add1861ee65d4645 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Thu, 29 Jun 2017 06:38:41 -0700 Subject: [PATCH] HDFS-11881. NameNode consumes a lot of memory for snapshot diff report generation. Contributed by Manoj Govindassamy. --- .../hdfs/protocolPB/PBHelperClient.java | 5 +- .../namenode/snapshot/SnapshotDiffInfo.java | 5 +- .../hadoop/hdfs/TestSnapshotCommands.java | 50 +++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 63d0025bd6..feb3061625 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -186,6 +186,7 @@ import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.security.proto.SecurityProtos.TokenProto; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.ChunkedArrayList; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.LimitInputStream; @@ -1455,7 +1456,7 @@ public static SnapshotDiffReport convert( String toSnapshot = reportProto.getToSnapshot(); List list = reportProto .getDiffReportEntriesList(); - List entries = new ArrayList<>(); + List entries = new ChunkedArrayList<>(); for (SnapshotDiffReportEntryProto entryProto : list) { DiffReportEntry entry = convert(entryProto); if (entry != null) @@ -2392,7 +2393,7 @@ public static SnapshotDiffReportProto convert(SnapshotDiffReport report) { return null; } List entries = report.getDiffList(); - List entryProtos = new ArrayList<>(); + List entryProtos = new ChunkedArrayList<>(); for (DiffReportEntry entry : entries) { SnapshotDiffReportEntryProto entryProto = convert(entry); if (entryProto != null) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java index a576c571d0..fcd80ae27a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java @@ -37,6 +37,7 @@ import com.google.common.base.Preconditions; import com.google.common.primitives.SignedBytes; +import org.apache.hadoop.util.ChunkedArrayList; /** * A class describing the difference between snapshots of a snapshottable @@ -186,7 +187,7 @@ boolean isFromEarlier() { * @return A {@link SnapshotDiffReport} describing the difference */ public SnapshotDiffReport generateReport() { - List diffReportList = new ArrayList(); + List diffReportList = new ChunkedArrayList<>(); for (Map.Entry drEntry : diffMap.entrySet()) { INode node = drEntry.getKey(); byte[][] path = drEntry.getValue(); @@ -213,7 +214,7 @@ public SnapshotDiffReport generateReport() { */ private List generateReport(ChildrenDiff dirDiff, byte[][] parentPath, boolean fromEarlier, Map renameMap) { - List list = new ArrayList(); + List list = new ChunkedArrayList<>(); List created = dirDiff.getList(ListType.CREATED); List deleted = dirDiff.getList(ListType.DELETED); byte[][] fullPath = new byte[parentPath.length + 1][]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java index 1d5e0711c4..7e1af2aa4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff; +import org.apache.hadoop.util.ChunkedArrayList; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -195,4 +196,53 @@ public void testSnapshotCommandsWithURI()throws Exception { "Disallowing snaphot on " + path + " succeeded", config); fs.delete(new Path("/Fully/QPath"), true); } + + @Test (timeout=60000) + public void testSnapshotDiff()throws Exception { + Configuration config = new HdfsConfiguration(); + Path snapDirPath = new Path(fs.getUri().toString() + "/snap_dir"); + String snapDir = snapDirPath.toString(); + fs.mkdirs(snapDirPath); + + DFSTestUtil.DFSAdminRun("-allowSnapshot " + snapDirPath, 0, + "Allowing snaphot on " + snapDirPath + " succeeded", config); + DFSTestUtil.createFile(fs, new Path(snapDirPath, "file1"), + 1024, (short) 1, 100); + DFSTestUtil.FsShellRun("-createSnapshot " + snapDirPath + " sn1", config); + DFSTestUtil.createFile(fs, new Path(snapDirPath, "file2"), + 1024, (short) 1, 100); + DFSTestUtil.createFile(fs, new Path(snapDirPath, "file3"), + 1024, (short) 1, 100); + DFSTestUtil.FsShellRun("-createSnapshot " + snapDirPath + " sn2", config); + + // verify the snapshot diff using api and command line + SnapshotDiffReport report_s1_s2 = + fs.getSnapshotDiffReport(snapDirPath, "sn1", "sn2"); + DFSTestUtil.toolRun(new SnapshotDiff(config), snapDir + + " sn1 sn2", 0, report_s1_s2.toString()); + DFSTestUtil.FsShellRun("-renameSnapshot " + snapDirPath + " sn2 sn3", + config); + + SnapshotDiffReport report_s1_s3 = + fs.getSnapshotDiffReport(snapDirPath, "sn1", "sn3"); + DFSTestUtil.toolRun(new SnapshotDiff(config), snapDir + + " sn1 sn3", 0, report_s1_s3.toString()); + + // Creating 100 more files so as to force DiffReport generation + // backend ChunkedArrayList to create multiple chunks. + for (int i = 0; i < 100; i++) { + DFSTestUtil.createFile(fs, new Path(snapDirPath, "file_" + i), + 1, (short) 1, 100); + } + DFSTestUtil.FsShellRun("-createSnapshot " + snapDirPath + " sn4", config); + DFSTestUtil.toolRun(new SnapshotDiff(config), snapDir + + " sn1 sn4", 0, null); + + DFSTestUtil.FsShellRun("-deleteSnapshot " + snapDir + " sn1", config); + DFSTestUtil.FsShellRun("-deleteSnapshot " + snapDir + " sn3", config); + DFSTestUtil.FsShellRun("-deleteSnapshot " + snapDir + " sn4", config); + DFSTestUtil.DFSAdminRun("-disallowSnapshot " + snapDir, 0, + "Disallowing snaphot on " + snapDirPath + " succeeded", config); + fs.delete(new Path("/Fully/QPath"), true); + } }