diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index b836ff8215..17cabadba2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -357,6 +357,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final boolean DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT =
HdfsClientConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT;
+ public static final String DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE = "dfs.namenode.snapshot.skip.capture.accesstime-only-change";
+ public static final boolean DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT = false;
+
// Whether to enable datanode's stale state detection and usage for reads
public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
index d4b24f5971..0dfaa8e7d0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAttrOp.java
@@ -494,7 +494,9 @@ static boolean unprotectedSetTimes(
// then no need to store access time
if (atime != -1 && (status || force
|| atime > inode.getAccessTime() + fsd.getAccessTimePrecision())) {
- inode.setAccessTime(atime, latest);
+ inode.setAccessTime(atime, latest,
+ fsd.getFSNamesystem().getSnapshotManager().
+ getSkipCaptureAccessTimeOnlyChange());
status = true;
}
return status;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 060bd59793..bc62a7ef66 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -413,7 +413,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
// update the block list.
// Update the salient file attributes.
- newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
+ newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false);
newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
ErasureCodingPolicy ecPolicy =
FSDirErasureCodingOp.unprotectedGetErasureCodingPolicy(
@@ -436,7 +436,7 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
final INodeFile file = INodeFile.valueOf(iip.getLastINode(), path);
// Update the salient file attributes.
- file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
+ file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID, false);
file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
ErasureCodingPolicy ecPolicy =
FSDirErasureCodingOp.unprotectedGetErasureCodingPolicy(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
index d768e08558..874563db9f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
@@ -724,8 +724,11 @@ public final long getAccessTime() {
/**
* Set last access time of inode.
*/
- public final INode setAccessTime(long accessTime, int latestSnapshotId) {
- recordModification(latestSnapshotId);
+ public final INode setAccessTime(long accessTime, int latestSnapshotId,
+ boolean skipCaptureAccessTimeOnlyChangeInSnapshot) {
+ if (!skipCaptureAccessTimeOnlyChangeInSnapshot) {
+ recordModification(latestSnapshotId);
+ }
setAccessTime(accessTime);
return this;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
index 4b479e04d8..2fef2358c8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
@@ -19,6 +19,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT;
import java.io.DataInput;
import java.io.DataOutput;
@@ -33,6 +35,8 @@
import javax.management.ObjectName;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DFSUtilClient;
@@ -67,8 +71,19 @@
* if necessary.
*/
public class SnapshotManager implements SnapshotStatsMXBean {
+ public static final Log LOG = LogFactory.getLog(SnapshotManager.class);
+
private final FSDirectory fsdir;
private final boolean captureOpenFiles;
+ /**
+ * If skipCaptureAccessTimeOnlyChange is set to true, if accessTime
+ * of a file changed but there is no other modification made to the file,
+ * it will not be captured in next snapshot. However, if there is other
+ * modification made to the file, the last access time will be captured
+ * together with the modification in next snapshot.
+ */
+ private boolean skipCaptureAccessTimeOnlyChange = false;
+
private final AtomicInteger numSnapshots = new AtomicInteger();
private static final int SNAPSHOT_ID_BIT_WIDTH = 24;
@@ -84,6 +99,19 @@ public SnapshotManager(final Configuration conf, final FSDirectory fsdir) {
this.captureOpenFiles = conf.getBoolean(
DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES,
DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES_DEFAULT);
+ this.skipCaptureAccessTimeOnlyChange = conf.getBoolean(
+ DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
+ DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT);
+ LOG.info("Loaded config captureOpenFiles: " + captureOpenFiles
+ + "skipCaptureAccessTimeOnlyChange: " +
+ skipCaptureAccessTimeOnlyChange);
+ }
+
+ /**
+ * @return skipCaptureAccessTimeOnlyChange
+ */
+ public boolean getSkipCaptureAccessTimeOnlyChange() {
+ return skipCaptureAccessTimeOnlyChange;
}
/** Used in tests only */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 2a42b5ce0f..bbe2eca00b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -4259,22 +4259,34 @@
-
- dfs.namenode.snapshot.capture.openfiles
- false
-
- If true, snapshots taken will have an immutable shared copy of
- the open files that have valid leases. Even after the open files
- grow or shrink in size, snapshot will always have the previous
- point-in-time version of the open files, just like all other
- closed files. Default is false.
- Note: The file length captured for open files in snapshot is
- whats recorded in NameNode at the time of snapshot and it may
- be shorter than what the client has written till then. In order
- to capture the latest length, the client can call hflush/hsync
- with the flag SyncFlag.UPDATE_LENGTH on the open files handles.
-
-
+
+ dfs.namenode.snapshot.capture.openfiles
+ false
+
+ If true, snapshots taken will have an immutable shared copy of
+ the open files that have valid leases. Even after the open files
+ grow or shrink in size, snapshot will always have the previous
+ point-in-time version of the open files, just like all other
+ closed files. Default is false.
+ Note: The file length captured for open files in snapshot is
+ whats recorded in NameNode at the time of snapshot and it may
+ be shorter than what the client has written till then. In order
+ to capture the latest length, the client can call hflush/hsync
+ with the flag SyncFlag.UPDATE_LENGTH on the open files handles.
+
+
+
+
+ dfs.namenode.snapshot.skip.capture.accesstime-only-change
+ false
+
+ If accessTime of a file/directory changed but there is no other
+ modification made to the file/directory, the changed accesstime will
+ not be captured in next snapshot. However, if there is other modification
+ made to the file/directory, the latest access time will be captured
+ together with the modification in next snapshot.
+
+
dfs.pipeline.ecn
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java
index 44cf57a33b..bdd48e873c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirAttrOp.java
@@ -21,6 +21,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
import org.junit.Test;
import org.mockito.Mockito;
@@ -36,10 +38,15 @@ public class TestFSDirAttrOp {
private boolean unprotectedSetTimes(long atime, long atime0, long precision,
long mtime, boolean force) throws QuotaExceededException {
+ FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
+ SnapshotManager ssMgr = Mockito.mock(SnapshotManager.class);
FSDirectory fsd = Mockito.mock(FSDirectory.class);
INodesInPath iip = Mockito.mock(INodesInPath.class);
INode inode = Mockito.mock(INode.class);
+ when(fsd.getFSNamesystem()).thenReturn(fsn);
+ when(fsn.getSnapshotManager()).thenReturn(ssMgr);
+ when(ssMgr.getSkipCaptureAccessTimeOnlyChange()).thenReturn(false);
when(fsd.getAccessTimePrecision()).thenReturn(precision);
when(fsd.hasWriteLock()).thenReturn(Boolean.TRUE);
when(iip.getLastINode()).thenReturn(inode);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
index b9451bc05d..4654486f35 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
@@ -18,10 +18,13 @@
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Random;
@@ -43,15 +46,21 @@
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.Time;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Tests snapshot deletion.
*/
public class TestSnapshotDiffReport {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestSnapshotDiffReport.class);
+
private static final long SEED = 0;
private static final short REPLICATION = 3;
private static final short REPLICATION_1 = 2;
@@ -73,6 +82,10 @@ public void setUp() throws Exception {
conf = new Configuration();
conf.setBoolean(
DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, true);
+ conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
+ conf.setBoolean(
+ DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
+ true);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
.format(true).build();
cluster.waitActive();
@@ -167,8 +180,8 @@ private void verifyDiffReport(Path dir, String from, String to,
// reverse the order of from and to
SnapshotDiffReport inverseReport = hdfs
.getSnapshotDiffReport(dir, to, from);
- System.out.println(report.toString());
- System.out.println(inverseReport.toString() + "\n");
+ LOG.info(report.toString());
+ LOG.info(inverseReport.toString() + "\n");
assertEquals(entries.length, report.getDiffList().size());
assertEquals(entries.length, inverseReport.getDiffList().size());
@@ -221,20 +234,20 @@ public void testDiffReport() throws Exception {
// diff between the same snapshot
SnapshotDiffReport report = hdfs.getSnapshotDiffReport(sub1, "s0", "s0");
- System.out.println(report);
+ LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
report = hdfs.getSnapshotDiffReport(sub1, "", "");
- System.out.println(report);
+ LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
report = hdfs.getSnapshotDiffReport(subsubsub1, "s0", "s2");
- System.out.println(report);
+ LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
// test path with scheme also works
report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1), "s0", "s2");
- System.out.println(report);
+ LOG.info(report.toString());
assertEquals(0, report.getDiffList().size());
verifyDiffReport(sub1, "s0", "s2",
@@ -677,4 +690,142 @@ public void testDiffReportWithOpenFiles() throws Exception {
}
+ private long getAccessTime(Path path) throws IOException {
+ return hdfs.getFileStatus(path).getAccessTime();
+ }
+
+ private String getAccessTimeStr(Path path) throws IOException {
+ SimpleDateFormat timeFmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ return timeFmt.format(new Date(getAccessTime(path)));
+ }
+
+ private Path getSSpath(Path path, Path ssRoot, String ssName) {
+ return new Path(ssRoot, ".snapshot/" + ssName + "/" +
+ path.toString().substring(ssRoot.toString().length()));
+ }
+
+ private void printAtime(Path path, Path ssRoot, String ssName)
+ throws IOException {
+ Path ssPath = getSSpath(path, ssRoot, ssName);
+ LOG.info("Access time "
+ + path + ": " + getAccessTimeStr(path)
+ + " " + ssPath + ": " + getAccessTimeStr(ssPath));
+ }
+
+ private void assertAtimeEquals(Path path, Path ssRoot,
+ String ssName1, String ssName2)
+ throws IOException {
+ Path ssPath1 = getSSpath(path, ssRoot, ssName1);
+ Path ssPath2 = getSSpath(path, ssRoot, ssName2);
+ assertEquals(getAccessTime(ssPath1), getAccessTime(ssPath2));
+ }
+
+ private void assertAtimeNotEquals(Path path, Path ssRoot,
+ String ssName1, String ssName2)
+ throws IOException {
+ Path ssPath1 = getSSpath(path, ssRoot, ssName1);
+ Path ssPath2 = getSSpath(path, ssRoot, ssName2);
+ assertNotEquals(getAccessTime(ssPath1), getAccessTime(ssPath2));
+ }
+
+ /**
+ * Check to see access time is not captured in snapshot when applicable.
+ * When DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE
+ * is set to true, and if a file's access time changed between two
+ * snapshots but has no other modification, then the access time is not
+ * captured in snapshot.
+ */
+ @Test
+ public void testDontCaptureAccessTimeOnlyChangeReport() throws Exception {
+ final Path froot = new Path("/");
+ final Path root = new Path(froot, "/testSdiffCalc");
+
+ // items created pre enabling snapshot
+ final Path filePreSS = new Path(root, "fParent/filePreSS");
+ final Path dirPreSS = new Path(root, "dirPreSS");
+ final Path dirPreSSChild = new Path(dirPreSS, "dirPreSSChild");
+
+ // items created after enabling snapshot
+ final Path filePostSS = new Path(root, "fParent/filePostSS");
+ final Path dirPostSS = new Path(root, "dirPostSS");
+ final Path dirPostSSChild = new Path(dirPostSS, "dirPostSSChild");
+
+ DFSTestUtil.createFile(hdfs, filePreSS, BLOCKSIZE, REPLICATION, SEED);
+ DFSTestUtil.createFile(hdfs, dirPreSSChild, BLOCKSIZE, REPLICATION, SEED);
+
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s0");
+ printAtime(filePreSS, root, "s0");
+ printAtime(dirPreSS, root, "s0");
+
+ // items created after creating the first snapshot
+ DFSTestUtil.createFile(hdfs, filePostSS, BLOCKSIZE, REPLICATION, SEED);
+ DFSTestUtil.createFile(hdfs, dirPostSSChild, BLOCKSIZE, REPLICATION, SEED);
+
+ Thread.sleep(3000);
+ long now = Time.now();
+ hdfs.setTimes(filePreSS, -1, now);
+ hdfs.setTimes(filePostSS, -1, now);
+ hdfs.setTimes(dirPreSS, -1, now);
+ hdfs.setTimes(dirPostSS, -1, now);
+
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s1");
+ printAtime(filePreSS, root, "s1");
+ printAtime(dirPreSS, root, "s1");
+ printAtime(filePostSS, root, "s1");
+ printAtime(dirPostSS, root, "s1");
+
+ Thread.sleep(3000);
+ now = Time.now();
+ hdfs.setTimes(filePreSS, -1, now);
+ hdfs.setTimes(filePostSS, -1, now);
+ hdfs.setTimes(dirPreSS, -1, now);
+ hdfs.setTimes(dirPostSS, -1, now);
+
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s2");
+ printAtime(filePreSS, root, "s2");
+ printAtime(dirPreSS, root, "s2");
+ printAtime(filePostSS, root, "s2");
+ printAtime(dirPostSS, root, "s2");
+
+ Thread.sleep(3000);
+ now = Time.now();
+ // modify filePostSS, and change access time
+ hdfs.setReplication(filePostSS, (short) (REPLICATION - 1));
+ hdfs.setTimes(filePostSS, -1, now);
+ SnapshotTestHelper.createSnapshot(hdfs, root, "s3");
+
+ LOG.info("\nsnapshotDiff s0 -> s1:");
+ LOG.info(hdfs.getSnapshotDiffReport(root, "s0", "s1").toString());
+ LOG.info("\nsnapshotDiff s1 -> s2:");
+ LOG.info(hdfs.getSnapshotDiffReport(root, "s1", "s2").toString());
+
+ assertAtimeEquals(filePreSS, root, "s0", "s1");
+ assertAtimeEquals(dirPreSS, root, "s0", "s1");
+
+ assertAtimeEquals(filePreSS, root, "s1", "s2");
+ assertAtimeEquals(dirPreSS, root, "s1", "s2");
+
+ assertAtimeEquals(filePostSS, root, "s1", "s2");
+ assertAtimeEquals(dirPostSS, root, "s1", "s2");
+
+ // access time should be captured in snapshot due to
+ // other modification
+ assertAtimeNotEquals(filePostSS, root, "s2", "s3");
+
+ // restart NN, and see the access time relationship
+ // still stands (no change caused by edit logs
+ // loading)
+ cluster.restartNameNodes();
+ cluster.waitActive();
+ assertAtimeEquals(filePreSS, root, "s0", "s1");
+ assertAtimeEquals(dirPreSS, root, "s0", "s1");
+
+ assertAtimeEquals(filePreSS, root, "s1", "s2");
+ assertAtimeEquals(dirPreSS, root, "s1", "s2");
+
+ assertAtimeEquals(filePostSS, root, "s1", "s2");
+ assertAtimeEquals(dirPostSS, root, "s1", "s2");
+
+ assertAtimeNotEquals(filePostSS, root, "s2", "s3");
+ }
}
\ No newline at end of file