From 9ff3836a367737d6dfcb12f50c8bd2f1b2233e37 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Thu, 19 Jun 2014 19:39:29 +0000 Subject: [PATCH] HDFS-6549. Add support for accessing the NFS gateway from the AIX NFS client. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1604022 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/hdfs/nfs/conf/NfsConfigKeys.java | 4 +- .../hadoop/hdfs/nfs/nfs3/OpenFileCtx.java | 36 +++++++++++--- .../hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java | 49 ++++++++++++++++--- .../hadoop/hdfs/nfs/nfs3/WriteManager.java | 7 ++- .../hadoop/hdfs/nfs/nfs3/TestWrites.java | 25 +++++++++- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../src/site/apt/HdfsNfsGateway.apt.vm | 19 +++++++ 7 files changed, 124 insertions(+), 19 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java index d1543b8a08..2f65ce4cba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java @@ -55,4 +55,6 @@ public class NfsConfigKeys { public static final String DFS_NFS_PORT_MONITORING_DISABLED_KEY = "nfs.port.monitoring.disabled"; public static final boolean DFS_NFS_PORT_MONITORING_DISABLED_DEFAULT = true; -} \ No newline at end of file + public static final String AIX_COMPAT_MODE_KEY = "nfs.aix.compatibility.mode.enabled"; + public static final boolean AIX_COMPAT_MODE_DEFAULT = false; +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java index e2ab31787b..cf44af5675 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java @@ -95,6 +95,7 @@ class OpenFileCtx { */ private AtomicLong nextOffset; private final HdfsDataOutputStream fos; + private final boolean aixCompatMode; // It's updated after each sync to HDFS private Nfs3FileAttributes latestAttr; @@ -199,8 +200,15 @@ class OpenFileCtx { OpenFileCtx(HdfsDataOutputStream fos, Nfs3FileAttributes latestAttr, String dumpFilePath, DFSClient client, IdUserGroup iug) { + this(fos, latestAttr, dumpFilePath, client, iug, false); + } + + OpenFileCtx(HdfsDataOutputStream fos, Nfs3FileAttributes latestAttr, + String dumpFilePath, DFSClient client, IdUserGroup iug, + boolean aixCompatMode) { this.fos = fos; this.latestAttr = latestAttr; + this.aixCompatMode = aixCompatMode; // We use the ReverseComparatorOnMin as the comparator of the map. In this // way, we first dump the data with larger offset. In the meanwhile, we // retrieve the last element to write back to HDFS. @@ -780,15 +788,29 @@ class OpenFileCtx { } if (commitOffset > 0) { - if (commitOffset > flushed) { - if (!fromRead) { - CommitCtx commitCtx = new CommitCtx(commitOffset, channel, xid, - preOpAttr); - pendingCommits.put(commitOffset, commitCtx); + if (aixCompatMode) { + // The AIX NFS client misinterprets RFC-1813 and will always send 4096 + // for the commitOffset even if fewer bytes than that have ever (or will + // ever) be sent by the client. So, if in AIX compatibility mode, we + // will always DO_SYNC if the number of bytes to commit have already all + // been flushed, else we will fall through to the logic below which + // checks for pending writes in the case that we're being asked to + // commit more bytes than have so far been flushed. See HDFS-6549 for + // more info. + if (commitOffset <= flushed) { + return COMMIT_STATUS.COMMIT_DO_SYNC; } - return COMMIT_STATUS.COMMIT_WAIT; } else { - return COMMIT_STATUS.COMMIT_DO_SYNC; + if (commitOffset > flushed) { + if (!fromRead) { + CommitCtx commitCtx = new CommitCtx(commitOffset, channel, xid, + preOpAttr); + pendingCommits.put(commitOffset, commitCtx); + } + return COMMIT_STATUS.COMMIT_WAIT; + } else { + return COMMIT_STATUS.COMMIT_DO_SYNC; + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index 4fc14ba1a0..446e722a21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -153,6 +153,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { private final short replication; private final long blockSize; private final int bufferSize; + private final boolean aixCompatMode; private Statistics statistics; private String writeDumpDir; // The dir save dump files @@ -170,8 +171,11 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { config.set(FsPermission.UMASK_LABEL, "000"); iug = new IdUserGroup(config); + aixCompatMode = config.getBoolean( + NfsConfigKeys.AIX_COMPAT_MODE_KEY, + NfsConfigKeys.AIX_COMPAT_MODE_DEFAULT); exports = NfsExports.getInstance(config); - writeManager = new WriteManager(iug, config); + writeManager = new WriteManager(iug, config, aixCompatMode); clientCache = new DFSClientCache(config); replication = (short) config.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, DFSConfigKeys.DFS_REPLICATION_DEFAULT); @@ -900,7 +904,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { // Add open stream OpenFileCtx openFileCtx = new OpenFileCtx(fos, postOpObjAttr, - writeDumpDir + "/" + postOpObjAttr.getFileId(), dfsClient, iug); + writeDumpDir + "/" + postOpObjAttr.getFileId(), dfsClient, iug, + aixCompatMode); fileHandle = new FileHandle(postOpObjAttr.getFileId()); if (!writeManager.addOpenFileStream(fileHandle, openFileCtx)) { LOG.warn("Can't add more stream, close it." @@ -1438,9 +1443,24 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } long cookieVerf = request.getCookieVerf(); if ((cookieVerf != 0) && (cookieVerf != dirStatus.getModificationTime())) { - LOG.error("CookierVerf mismatch. request cookierVerf:" + cookieVerf - + " dir cookieVerf:" + dirStatus.getModificationTime()); - return new READDIR3Response(Nfs3Status.NFS3ERR_BAD_COOKIE); + if (aixCompatMode) { + // The AIX NFS client misinterprets RFC-1813 and will repeatedly send + // the same cookieverf value even across VFS-level readdir calls, + // instead of getting a new cookieverf for every VFS-level readdir + // call, and reusing the cookieverf only in the event that multiple + // incremental NFS-level readdir calls must be made to fetch all of + // the directory entries. This means that whenever a readdir call is + // made by an AIX NFS client for a given directory, and that directory + // is subsequently modified, thus changing its mtime, no later readdir + // calls will succeed from AIX for that directory until the FS is + // unmounted/remounted. See HDFS-6549 for more info. + LOG.warn("AIX compatibility mode enabled, ignoring cookieverf " + + "mismatches."); + } else { + LOG.error("CookieVerf mismatch. request cookieVerf: " + cookieVerf + + " dir cookieVerf: " + dirStatus.getModificationTime()); + return new READDIR3Response(Nfs3Status.NFS3ERR_BAD_COOKIE); + } } if (cookie == 0) { @@ -1588,9 +1608,22 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { } long cookieVerf = request.getCookieVerf(); if ((cookieVerf != 0) && (cookieVerf != dirStatus.getModificationTime())) { - LOG.error("CookierVerf mismatch. request cookierVerf:" + cookieVerf - + " dir cookieVerf:" + dirStatus.getModificationTime()); - return new READDIRPLUS3Response(Nfs3Status.NFS3ERR_BAD_COOKIE); + if (aixCompatMode) { + // The AIX NFS client misinterprets RFC-1813 and will repeatedly send + // the same cookieverf value even across VFS-level readdir calls, + // instead of getting a new cookieverf for every VFS-level readdir + // call. This means that whenever a readdir call is made by an AIX NFS + // client for a given directory, and that directory is subsequently + // modified, thus changing its mtime, no later readdir calls will + // succeed for that directory from AIX until the FS is + // unmounted/remounted. See HDFS-6549 for more info. + LOG.warn("AIX compatibility mode enabled, ignoring cookieverf " + + "mismatches."); + } else { + LOG.error("cookieverf mismatch. request cookieverf: " + cookieVerf + + " dir cookieverf: " + dirStatus.getModificationTime()); + return new READDIRPLUS3Response(Nfs3Status.NFS3ERR_BAD_COOKIE); + } } if (cookie == 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java index 2cd8b22c9d..5f2ded744d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java @@ -58,6 +58,7 @@ public class WriteManager { private boolean asyncDataServiceStarted = false; private final int maxStreams; + private final boolean aixCompatMode; /** * The time limit to wait for accumulate reordered sequential writes to the @@ -79,9 +80,11 @@ public class WriteManager { return fileContextCache.put(h, ctx); } - WriteManager(IdUserGroup iug, final NfsConfiguration config) { + WriteManager(IdUserGroup iug, final NfsConfiguration config, + boolean aixCompatMode) { this.iug = iug; this.config = config; + this.aixCompatMode = aixCompatMode; streamTimeout = config.getLong(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY, NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_DEFAULT); LOG.info("Stream timeout is " + streamTimeout + "ms."); @@ -175,7 +178,7 @@ public class WriteManager { String writeDumpDir = config.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY, NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_DEFAULT); openFileCtx = new OpenFileCtx(fos, latestAttr, writeDumpDir + "/" - + fileHandle.getFileId(), dfsClient, iug); + + fileHandle.getFileId(), dfsClient, iug, aixCompatMode); if (!addOpenFileStream(fileHandle, openFileCtx)) { LOG.info("Can't add new stream. Close it. Tell client to retry."); diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java index 90cfc54feb..3945b298f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java @@ -190,6 +190,29 @@ public class TestWrites { ret = ctx.checkCommit(dfsClient, 0, ch, 1, attr, false); Assert.assertTrue(ret == COMMIT_STATUS.COMMIT_FINISHED); } + + @Test + public void testCheckCommitAixCompatMode() throws IOException { + DFSClient dfsClient = Mockito.mock(DFSClient.class); + Nfs3FileAttributes attr = new Nfs3FileAttributes(); + HdfsDataOutputStream fos = Mockito.mock(HdfsDataOutputStream.class); + + // Last argument "true" here to enable AIX compatibility mode. + OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient, + new IdUserGroup(new NfsConfiguration()), true); + + // Test fall-through to pendingWrites check in the event that commitOffset + // is greater than the number of bytes we've so far flushed. + Mockito.when(fos.getPos()).thenReturn((long) 2); + COMMIT_STATUS status = ctx.checkCommitInternal(5, null, 1, attr, false); + Assert.assertTrue(status == COMMIT_STATUS.COMMIT_FINISHED); + + // Test the case when we actually have received more bytes than we're trying + // to commit. + Mockito.when(fos.getPos()).thenReturn((long) 10); + status = ctx.checkCommitInternal(5, null, 1, attr, false); + Assert.assertTrue(status == COMMIT_STATUS.COMMIT_DO_SYNC); + } @Test // Validate all the commit check return codes OpenFileCtx.COMMIT_STATUS, which @@ -207,7 +230,7 @@ public class TestWrites { FileHandle h = new FileHandle(1); // fake handle for "/dumpFilePath" COMMIT_STATUS ret; - WriteManager wm = new WriteManager(new IdUserGroup(config), config); + WriteManager wm = new WriteManager(new IdUserGroup(config), config, false); assertTrue(wm.addOpenFileStream(h, ctx)); // Test inactive open file context diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 86f193074b..0a717773c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -673,6 +673,9 @@ Release 2.5.0 - UNRELEASED HDFS-3848. A Bug in recoverLeaseInternal method of FSNameSystem class (Hooman Peiro Sajjad and Chen He via kihwal) + HDFS-6549. Add support for accessing the NFS gateway from the AIX NFS + client. (atm) + BREAKDOWN OF HDFS-2006 SUBTASKS AND RELATED JIRAS HDFS-6299. Protobuf for XAttr and client-side implementation. (Yi Liu via umamahesh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm index 09cdb5819d..54544cff46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm @@ -88,6 +88,25 @@ HDFS NFS Gateway ---- + The AIX NFS client has a {{{https://issues.apache.org/jira/browse/HDFS-6549}few known issues}} + that prevent it from working correctly by default with the HDFS NFS + Gateway. If you want to be able to access the HDFS NFS Gateway from AIX, you + should set the following configuration setting to enable work-arounds for these + issues: + +---- + + nfs.aix.compatibility.mode.enabled + true + +---- + + Note that regular, non-AIX clients should NOT enable AIX compatibility mode. + The work-arounds implemented by AIX compatibility mode effectively disable + safeguards to ensure that listing of directory contents via NFS returns + consistent results, and that all data sent to the NFS server can be assured to + have been committed. + It's strongly recommended for the users to update a few configuration properties based on their use cases. All the related configuration properties can be added or updated in hdfs-site.xml.