diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index b836ff8215..17cabadba2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -357,6 +357,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT = HdfsClientConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT; + public static final String DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE = "dfs.namenode.snapshot.skip.capture.accesstime-only-change"; + public static final boolean DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT = false; + // Whether to enable datanode's stale state detection and usage for reads public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode"; public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java index d4b24f5971..0dfaa8e7d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java @@ -494,7 +494,9 @@ static boolean unprotectedSetTimes( // then no need to store access time if (atime != -1 && (status || force || atime > inode.getAccessTime() + fsd.getAccessTimePrecision())) { - inode.setAccessTime(atime, latest); + inode.setAccessTime(atime, latest, + fsd.getFSNamesystem().getSnapshotManager(). + getSkipCaptureAccessTimeOnlyChange()); status = true; } return status; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 060bd59793..bc62a7ef66 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -413,7 +413,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, // update the block list. // Update the salient file attributes. - newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID); + newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false); newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID); ErasureCodingPolicy ecPolicy = FSDirErasureCodingOp.unprotectedGetErasureCodingPolicy( @@ -436,7 +436,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, final INodeFile file = INodeFile.valueOf(iip.getLastINode(), path); // Update the salient file attributes. - file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID); + file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false); file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID); ErasureCodingPolicy ecPolicy = FSDirErasureCodingOp.unprotectedGetErasureCodingPolicy( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index d768e08558..874563db9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -724,8 +724,11 @@ public final long getAccessTime() { /** * Set last access time of inode. */ - public final INode setAccessTime(long accessTime, int latestSnapshotId) { - recordModification(latestSnapshotId); + public final INode setAccessTime(long accessTime, int latestSnapshotId, + boolean skipCaptureAccessTimeOnlyChangeInSnapshot) { + if (!skipCaptureAccessTimeOnlyChangeInSnapshot) { + recordModification(latestSnapshotId); + } setAccessTime(accessTime); return this; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java index 4b479e04d8..2fef2358c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java @@ -19,6 +19,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT; import java.io.DataInput; import java.io.DataOutput; @@ -33,6 +35,8 @@ import javax.management.ObjectName; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtilClient; @@ -67,8 +71,19 @@ * if necessary. */ public class SnapshotManager implements SnapshotStatsMXBean { + public static final Log LOG = LogFactory.getLog(SnapshotManager.class); + private final FSDirectory fsdir; private final boolean captureOpenFiles; + /** + * If skipCaptureAccessTimeOnlyChange is set to true, if accessTime + * of a file changed but there is no other modification made to the file, + * it will not be captured in next snapshot. However, if there is other + * modification made to the file, the last access time will be captured + * together with the modification in next snapshot. + */ + private boolean skipCaptureAccessTimeOnlyChange = false; + private final AtomicInteger numSnapshots = new AtomicInteger(); private static final int SNAPSHOT_ID_BIT_WIDTH = 24; @@ -84,6 +99,19 @@ public SnapshotManager(final Configuration conf, final FSDirectory fsdir) { this.captureOpenFiles = conf.getBoolean( DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT); + this.skipCaptureAccessTimeOnlyChange = conf.getBoolean( + DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE, + DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT); + LOG.info("Loaded config captureOpenFiles: " + captureOpenFiles + + "skipCaptureAccessTimeOnlyChange: " + + skipCaptureAccessTimeOnlyChange); + } + + /** + * @return skipCaptureAccessTimeOnlyChange + */ + public boolean getSkipCaptureAccessTimeOnlyChange() { + return skipCaptureAccessTimeOnlyChange; } /** Used in tests only */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 2a42b5ce0f..bbe2eca00b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -4259,22 +4259,34 @@ - - dfs.namenode.snapshot.capture.openfiles - false - - If true, snapshots taken will have an immutable shared copy of - the open files that have valid leases. Even after the open files - grow or shrink in size, snapshot will always have the previous - point-in-time version of the open files, just like all other - closed files. Default is false. - Note: The file length captured for open files in snapshot is - whats recorded in NameNode at the time of snapshot and it may - be shorter than what the client has written till then. In order - to capture the latest length, the client can call hflush/hsync - with the flag SyncFlag.UPDATE_LENGTH on the open files handles. - - + + dfs.namenode.snapshot.capture.openfiles + false + + If true, snapshots taken will have an immutable shared copy of + the open files that have valid leases. Even after the open files + grow or shrink in size, snapshot will always have the previous + point-in-time version of the open files, just like all other + closed files. Default is false. + Note: The file length captured for open files in snapshot is + whats recorded in NameNode at the time of snapshot and it may + be shorter than what the client has written till then. In order + to capture the latest length, the client can call hflush/hsync + with the flag SyncFlag.UPDATE_LENGTH on the open files handles. + + + + + dfs.namenode.snapshot.skip.capture.accesstime-only-change + false + + If accessTime of a file/directory changed but there is no other + modification made to the file/directory, the changed accesstime will + not be captured in next snapshot. However, if there is other modification + made to the file/directory, the latest access time will be captured + together with the modification in next snapshot. + + dfs.pipeline.ecn diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java index 44cf57a33b..bdd48e873c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java @@ -21,6 +21,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; import org.junit.Test; import org.mockito.Mockito; @@ -36,10 +38,15 @@ public class TestFSDirAttrOp { private boolean unprotectedSetTimes(long atime, long atime0, long precision, long mtime, boolean force) throws QuotaExceededException { + FSNamesystem fsn = Mockito.mock(FSNamesystem.class); + SnapshotManager ssMgr = Mockito.mock(SnapshotManager.class); FSDirectory fsd = Mockito.mock(FSDirectory.class); INodesInPath iip = Mockito.mock(INodesInPath.class); INode inode = Mockito.mock(INode.class); + when(fsd.getFSNamesystem()).thenReturn(fsn); + when(fsn.getSnapshotManager()).thenReturn(ssMgr); + when(ssMgr.getSkipCaptureAccessTimeOnlyChange()).thenReturn(false); when(fsd.getAccessTimePrecision()).thenReturn(precision); when(fsd.hasWriteLock()).thenReturn(Boolean.TRUE); when(iip.getLastINode()).thenReturn(inode); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java index b9451bc05d..4654486f35 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hdfs.server.namenode.snapshot; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; import java.util.EnumSet; import java.util.HashMap; import java.util.Random; @@ -43,15 +46,21 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Time; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Tests snapshot deletion. */ public class TestSnapshotDiffReport { + private static final Logger LOG = + LoggerFactory.getLogger(TestSnapshotDiffReport.class); + private static final long SEED = 0; private static final short REPLICATION = 3; private static final short REPLICATION_1 = 2; @@ -73,6 +82,10 @@ public void setUp() throws Exception { conf = new Configuration(); conf.setBoolean( DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, true); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE, + true); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION) .format(true).build(); cluster.waitActive(); @@ -167,8 +180,8 @@ private void verifyDiffReport(Path dir, String from, String to, // reverse the order of from and to SnapshotDiffReport inverseReport = hdfs .getSnapshotDiffReport(dir, to, from); - System.out.println(report.toString()); - System.out.println(inverseReport.toString() + "\n"); + LOG.info(report.toString()); + LOG.info(inverseReport.toString() + "\n"); assertEquals(entries.length, report.getDiffList().size()); assertEquals(entries.length, inverseReport.getDiffList().size()); @@ -221,20 +234,20 @@ public void testDiffReport() throws Exception { // diff between the same snapshot SnapshotDiffReport report = hdfs.getSnapshotDiffReport(sub1, "s0", "s0"); - System.out.println(report); + LOG.info(report.toString()); assertEquals(0, report.getDiffList().size()); report = hdfs.getSnapshotDiffReport(sub1, "", ""); - System.out.println(report); + LOG.info(report.toString()); assertEquals(0, report.getDiffList().size()); report = hdfs.getSnapshotDiffReport(subsubsub1, "s0", "s2"); - System.out.println(report); + LOG.info(report.toString()); assertEquals(0, report.getDiffList().size()); // test path with scheme also works report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1), "s0", "s2"); - System.out.println(report); + LOG.info(report.toString()); assertEquals(0, report.getDiffList().size()); verifyDiffReport(sub1, "s0", "s2", @@ -677,4 +690,142 @@ public void testDiffReportWithOpenFiles() throws Exception { } + private long getAccessTime(Path path) throws IOException { + return hdfs.getFileStatus(path).getAccessTime(); + } + + private String getAccessTimeStr(Path path) throws IOException { + SimpleDateFormat timeFmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return timeFmt.format(new Date(getAccessTime(path))); + } + + private Path getSSpath(Path path, Path ssRoot, String ssName) { + return new Path(ssRoot, ".snapshot/" + ssName + "/" + + path.toString().substring(ssRoot.toString().length())); + } + + private void printAtime(Path path, Path ssRoot, String ssName) + throws IOException { + Path ssPath = getSSpath(path, ssRoot, ssName); + LOG.info("Access time " + + path + ": " + getAccessTimeStr(path) + + " " + ssPath + ": " + getAccessTimeStr(ssPath)); + } + + private void assertAtimeEquals(Path path, Path ssRoot, + String ssName1, String ssName2) + throws IOException { + Path ssPath1 = getSSpath(path, ssRoot, ssName1); + Path ssPath2 = getSSpath(path, ssRoot, ssName2); + assertEquals(getAccessTime(ssPath1), getAccessTime(ssPath2)); + } + + private void assertAtimeNotEquals(Path path, Path ssRoot, + String ssName1, String ssName2) + throws IOException { + Path ssPath1 = getSSpath(path, ssRoot, ssName1); + Path ssPath2 = getSSpath(path, ssRoot, ssName2); + assertNotEquals(getAccessTime(ssPath1), getAccessTime(ssPath2)); + } + + /** + * Check to see access time is not captured in snapshot when applicable. + * When DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE + * is set to true, and if a file's access time changed between two + * snapshots but has no other modification, then the access time is not + * captured in snapshot. + */ + @Test + public void testDontCaptureAccessTimeOnlyChangeReport() throws Exception { + final Path froot = new Path("/"); + final Path root = new Path(froot, "/testSdiffCalc"); + + // items created pre enabling snapshot + final Path filePreSS = new Path(root, "fParent/filePreSS"); + final Path dirPreSS = new Path(root, "dirPreSS"); + final Path dirPreSSChild = new Path(dirPreSS, "dirPreSSChild"); + + // items created after enabling snapshot + final Path filePostSS = new Path(root, "fParent/filePostSS"); + final Path dirPostSS = new Path(root, "dirPostSS"); + final Path dirPostSSChild = new Path(dirPostSS, "dirPostSSChild"); + + DFSTestUtil.createFile(hdfs, filePreSS, BLOCKSIZE, REPLICATION, SEED); + DFSTestUtil.createFile(hdfs, dirPreSSChild, BLOCKSIZE, REPLICATION, SEED); + + SnapshotTestHelper.createSnapshot(hdfs, root, "s0"); + printAtime(filePreSS, root, "s0"); + printAtime(dirPreSS, root, "s0"); + + // items created after creating the first snapshot + DFSTestUtil.createFile(hdfs, filePostSS, BLOCKSIZE, REPLICATION, SEED); + DFSTestUtil.createFile(hdfs, dirPostSSChild, BLOCKSIZE, REPLICATION, SEED); + + Thread.sleep(3000); + long now = Time.now(); + hdfs.setTimes(filePreSS, -1, now); + hdfs.setTimes(filePostSS, -1, now); + hdfs.setTimes(dirPreSS, -1, now); + hdfs.setTimes(dirPostSS, -1, now); + + SnapshotTestHelper.createSnapshot(hdfs, root, "s1"); + printAtime(filePreSS, root, "s1"); + printAtime(dirPreSS, root, "s1"); + printAtime(filePostSS, root, "s1"); + printAtime(dirPostSS, root, "s1"); + + Thread.sleep(3000); + now = Time.now(); + hdfs.setTimes(filePreSS, -1, now); + hdfs.setTimes(filePostSS, -1, now); + hdfs.setTimes(dirPreSS, -1, now); + hdfs.setTimes(dirPostSS, -1, now); + + SnapshotTestHelper.createSnapshot(hdfs, root, "s2"); + printAtime(filePreSS, root, "s2"); + printAtime(dirPreSS, root, "s2"); + printAtime(filePostSS, root, "s2"); + printAtime(dirPostSS, root, "s2"); + + Thread.sleep(3000); + now = Time.now(); + // modify filePostSS, and change access time + hdfs.setReplication(filePostSS, (short) (REPLICATION - 1)); + hdfs.setTimes(filePostSS, -1, now); + SnapshotTestHelper.createSnapshot(hdfs, root, "s3"); + + LOG.info("\nsnapshotDiff s0 -> s1:"); + LOG.info(hdfs.getSnapshotDiffReport(root, "s0", "s1").toString()); + LOG.info("\nsnapshotDiff s1 -> s2:"); + LOG.info(hdfs.getSnapshotDiffReport(root, "s1", "s2").toString()); + + assertAtimeEquals(filePreSS, root, "s0", "s1"); + assertAtimeEquals(dirPreSS, root, "s0", "s1"); + + assertAtimeEquals(filePreSS, root, "s1", "s2"); + assertAtimeEquals(dirPreSS, root, "s1", "s2"); + + assertAtimeEquals(filePostSS, root, "s1", "s2"); + assertAtimeEquals(dirPostSS, root, "s1", "s2"); + + // access time should be captured in snapshot due to + // other modification + assertAtimeNotEquals(filePostSS, root, "s2", "s3"); + + // restart NN, and see the access time relationship + // still stands (no change caused by edit logs + // loading) + cluster.restartNameNodes(); + cluster.waitActive(); + assertAtimeEquals(filePreSS, root, "s0", "s1"); + assertAtimeEquals(dirPreSS, root, "s0", "s1"); + + assertAtimeEquals(filePreSS, root, "s1", "s2"); + assertAtimeEquals(dirPreSS, root, "s1", "s2"); + + assertAtimeEquals(filePostSS, root, "s1", "s2"); + assertAtimeEquals(dirPostSS, root, "s1", "s2"); + + assertAtimeNotEquals(filePostSS, root, "s2", "s3"); + } } \ No newline at end of file