diff --git a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java
index b49aef462c..77e67a140f 100644
--- a/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java
+++ b/hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java
@@ -206,6 +206,12 @@ public static WriteStableHow fromValue(int id) {
public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump";
public static final boolean ENABLE_FILE_DUMP_DEFAULT = true;
+ public static final String MAX_READ_TRANSFER_SIZE_KEY = "dfs.nfs.rtmax";
+ public static final int MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
+ public static final String MAX_WRITE_TRANSFER_SIZE_KEY = "dfs.nfs.wtmax";
+ public static final int MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
+ public static final String MAX_READDIR_TRANSFER_SIZE_KEY = "dfs.nfs.dtmax";
+ public static final int MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files";
public static final int MAX_OPEN_FILES_DEFAULT = 256;
public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout";
diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
index e809b72d60..f5fa03e7ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
@@ -142,9 +142,6 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
(short) DEFAULT_UMASK);
static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
- private static final int MAX_READ_TRANSFER_SIZE = 64 * 1024;
- private static final int MAX_WRITE_TRANSFER_SIZE = 64 * 1024;
- private static final int MAX_READDIR_TRANSFER_SIZE = 64 * 1024;
private final Configuration config = new Configuration();
private final WriteManager writeManager;
@@ -553,7 +550,11 @@ public READLINK3Response readlink(XDR xdr, SecurityHandler securityHandler,
+ handle.getFileId());
return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
}
- if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) {
+ int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
+ Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+ if (rtmax < target.getBytes().length) {
+ LOG.error("Link size: " + target.getBytes().length
+ + " is larger than max transfer size: " + rtmax);
return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr,
new byte[0]);
}
@@ -649,7 +650,9 @@ public READ3Response read(XDR xdr, SecurityHandler securityHandler,
}
try {
- int buffSize = Math.min(MAX_READ_TRANSFER_SIZE, count);
+ int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
+ Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+ int buffSize = Math.min(rtmax, count);
byte[] readbuffer = new byte[buffSize];
int readCount = 0;
@@ -1714,9 +1717,12 @@ public FSINFO3Response fsinfo(XDR xdr, SecurityHandler securityHandler,
}
try {
- int rtmax = MAX_READ_TRANSFER_SIZE;
- int wtmax = MAX_WRITE_TRANSFER_SIZE;
- int dtperf = MAX_READDIR_TRANSFER_SIZE;
+ int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
+ Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+ int wtmax = config.getInt(Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_KEY,
+ Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_DEFAULT);
+ int dtperf = config.getInt(Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_KEY,
+ Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_DEFAULT);
Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
Nfs3Utils.getFileIdPath(handle), iug);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index b9346495f2..9eaaf67525 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -400,6 +400,9 @@ Release 2.4.0 - UNRELEASED
HDFS-6072. Clean up dead code of FSImage. (wheat9)
+ HDFS-6080. Improve NFS gateway performance by making rtmax and wtmax
+ configurable. (Abin Shahab via brandonli)
+
OPTIMIZATIONS
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
index d99692d9ce..fe1b4dbbef 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
@@ -48,35 +48,48 @@ HDFS NFS Gateway
The user running the NFS-gateway must be able to proxy all the users using the NFS mounts.
For instance, if user 'nfsserver' is running the gateway, and users belonging to the groups 'nfs-users1'
- and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set:
+ and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set
+ (NOTE: replace 'nfsserver' with the user name starting the gateway in your cluster):
+
----
hadoop.proxyuser.nfsserver.groups
nfs-users1,nfs-users2
- The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and 'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group.
+ The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and
+ 'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group.
+----
+
+----
hadoop.proxyuser.nfsserver.hosts
nfs-client-host1.com
- This is the host where the nfs gateway is running. Set this to '*' to allow requests from any hosts to be proxied.
+ This is the host where the nfs gateway is running. Set this to '*' to allow
+ requests from any hosts to be proxied.
----
+
The above are the only required configuration for the NFS gateway in non-secure mode. For Kerberized
hadoop clusters, the following configurations need to be added to hdfs-site.xml:
+
----
-
- dfs.nfsgateway.keytab.file
- /etc/hadoop/conf/nfsserver.keytab
-
-
- dfs.nfsgateway.kerberos.principal
- nfsserver/_HOST@YOUR-REALM.COM
-
+
+ dfs.nfsgateway.keytab.file
+ /etc/hadoop/conf/nfsserver.keytab
+
----
+
+----
+
+ dfs.nfsgateway.kerberos.principal
+ nfsserver/_HOST@YOUR-REALM.COM
+
+----
+
It's strongly recommended for the users to update a few configuration properties based on their use
cases. All the related configuration properties can be added or updated in hdfs-site.xml.
@@ -90,31 +103,61 @@ HDFS NFS Gateway
dfs.namenode.accesstime.precision
3600000
- The access time for HDFS file is precise upto this value.
+ The access time for HDFS file is precise upto this value.
The default value is 1 hour. Setting a value of 0 disables
access times for HDFS.
----
- * Users are expected to update the file dump directory. NFS client often
+ * Users are expected to update the file dump directory. NFS client often
reorders writes. Sequential writes can arrive at the NFS gateway at random
order. This directory is used to temporarily save out-of-order writes
- before writing to HDFS. For each file, the out-of-order writes are dumped after
+ before writing to HDFS. For each file, the out-of-order writes are dumped after
they are accumulated to exceed certain threshold (e.g., 1MB) in memory.
One needs to make sure the directory has enough
- space. For example, if the application uploads 10 files with each having
+ space. For example, if the application uploads 10 files with each having
100MB, it is recommended for this directory to have roughly 1GB space in case if a
- worst-case write reorder happens to every file. Only NFS gateway needs to restart after
+ worst-case write reorder happens to every file. Only NFS gateway needs to restart after
this property is updated.
----
- dfs.nfs3.dump.dir
+ dfs.nfs3.dump.dir
/tmp/.hdfs-nfs
----
+ * For optimal performance, it is recommended that rtmax be updated to
+ 1MB. However, note that this 1MB is a per client allocation, and not
+ from a shared memory pool, and therefore a larger value may adversely
+ affect small reads, consuming a lot of memory. The maximum value of
+ this property is 1MB.
+
+----
+
+ dfs.nfs.rtmax
+ 1048576
+ This is the maximum size in bytes of a READ request
+ supported by the NFS gateway. If you change this, make sure you
+ also update the nfs mount's rsize(add rsize= # of bytes to the
+ mount directive).
+
+
+----
+
+----
+
+ dfs.nfs.wtmax
+ 65536
+ This is the maximum size in bytes of a WRITE request
+ supported by the NFS gateway. If you change this, make sure you
+ also update the nfs mount's wsize(add wsize= # of bytes to the
+ mount directive).
+
+
+----
+
* By default, the export can be mounted by any client. To better control the access,
users can update the following property. The value string contains machine name and
access privilege, separated by whitespace