HDFS-6080. Improve NFS gateway performance by making rtmax and wtmax configurable. Contributed by Abin Shahab

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1577319 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Brandon Li 2014-03-13 21:03:59 +00:00
parent aab466558a
commit 842aa2bc94
4 changed files with 83 additions and 25 deletions

View File

@ -206,6 +206,12 @@ public static WriteStableHow fromValue(int id) {
public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs"; public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump"; public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump";
public static final boolean ENABLE_FILE_DUMP_DEFAULT = true; public static final boolean ENABLE_FILE_DUMP_DEFAULT = true;
public static final String MAX_READ_TRANSFER_SIZE_KEY = "dfs.nfs.rtmax";
public static final int MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
public static final String MAX_WRITE_TRANSFER_SIZE_KEY = "dfs.nfs.wtmax";
public static final int MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
public static final String MAX_READDIR_TRANSFER_SIZE_KEY = "dfs.nfs.dtmax";
public static final int MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files"; public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files";
public static final int MAX_OPEN_FILES_DEFAULT = 256; public static final int MAX_OPEN_FILES_DEFAULT = 256;
public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout"; public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout";

View File

@ -142,9 +142,6 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
(short) DEFAULT_UMASK); (short) DEFAULT_UMASK);
static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class); static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
private static final int MAX_READ_TRANSFER_SIZE = 64 * 1024;
private static final int MAX_WRITE_TRANSFER_SIZE = 64 * 1024;
private static final int MAX_READDIR_TRANSFER_SIZE = 64 * 1024;
private final Configuration config = new Configuration(); private final Configuration config = new Configuration();
private final WriteManager writeManager; private final WriteManager writeManager;
@ -553,7 +550,11 @@ public READLINK3Response readlink(XDR xdr, SecurityHandler securityHandler,
+ handle.getFileId()); + handle.getFileId());
return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT); return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
} }
if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) { int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
if (rtmax < target.getBytes().length) {
LOG.error("Link size: " + target.getBytes().length
+ " is larger than max transfer size: " + rtmax);
return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr, return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr,
new byte[0]); new byte[0]);
} }
@ -649,7 +650,9 @@ public READ3Response read(XDR xdr, SecurityHandler securityHandler,
} }
try { try {
int buffSize = Math.min(MAX_READ_TRANSFER_SIZE, count); int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
int buffSize = Math.min(rtmax, count);
byte[] readbuffer = new byte[buffSize]; byte[] readbuffer = new byte[buffSize];
int readCount = 0; int readCount = 0;
@ -1714,9 +1717,12 @@ public FSINFO3Response fsinfo(XDR xdr, SecurityHandler securityHandler,
} }
try { try {
int rtmax = MAX_READ_TRANSFER_SIZE; int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
int wtmax = MAX_WRITE_TRANSFER_SIZE; Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
int dtperf = MAX_READDIR_TRANSFER_SIZE; int wtmax = config.getInt(Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_KEY,
Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_DEFAULT);
int dtperf = config.getInt(Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_KEY,
Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_DEFAULT);
Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient, Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
Nfs3Utils.getFileIdPath(handle), iug); Nfs3Utils.getFileIdPath(handle), iug);

View File

@ -400,6 +400,9 @@ Release 2.4.0 - UNRELEASED
HDFS-6072. Clean up dead code of FSImage. (wheat9) HDFS-6072. Clean up dead code of FSImage. (wheat9)
HDFS-6080. Improve NFS gateway performance by making rtmax and wtmax
configurable. (Abin Shahab via brandonli)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

View File

@ -48,35 +48,48 @@ HDFS NFS Gateway
The user running the NFS-gateway must be able to proxy all the users using the NFS mounts. The user running the NFS-gateway must be able to proxy all the users using the NFS mounts.
For instance, if user 'nfsserver' is running the gateway, and users belonging to the groups 'nfs-users1' For instance, if user 'nfsserver' is running the gateway, and users belonging to the groups 'nfs-users1'
and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set: and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set
(NOTE: replace 'nfsserver' with the user name starting the gateway in your cluster):
---- ----
<property> <property>
<name>hadoop.proxyuser.nfsserver.groups</name> <name>hadoop.proxyuser.nfsserver.groups</name>
<value>nfs-users1,nfs-users2</value> <value>nfs-users1,nfs-users2</value>
<description> <description>
The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and 'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group. The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and
'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group.
</description> </description>
</property> </property>
----
----
<property> <property>
<name>hadoop.proxyuser.nfsserver.hosts</name> <name>hadoop.proxyuser.nfsserver.hosts</name>
<value>nfs-client-host1.com</value> <value>nfs-client-host1.com</value>
<description> <description>
This is the host where the nfs gateway is running. Set this to '*' to allow requests from any hosts to be proxied. This is the host where the nfs gateway is running. Set this to '*' to allow
requests from any hosts to be proxied.
</description> </description>
</property> </property>
---- ----
The above are the only required configuration for the NFS gateway in non-secure mode. For Kerberized The above are the only required configuration for the NFS gateway in non-secure mode. For Kerberized
hadoop clusters, the following configurations need to be added to hdfs-site.xml: hadoop clusters, the following configurations need to be added to hdfs-site.xml:
---- ----
<property> <property>
<name>dfs.nfsgateway.keytab.file</name> <name>dfs.nfsgateway.keytab.file</name>
<value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab --> <value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab -->
</property> </property>
<property>
<name>dfs.nfsgateway.kerberos.principal</name>
<value>nfsserver/_HOST@YOUR-REALM.COM</value>
</property>
---- ----
----
<property>
<name>dfs.nfsgateway.kerberos.principal</name>
<value>nfsserver/_HOST@YOUR-REALM.COM</value>
</property>
----
It's strongly recommended for the users to update a few configuration properties based on their use It's strongly recommended for the users to update a few configuration properties based on their use
cases. All the related configuration properties can be added or updated in hdfs-site.xml. cases. All the related configuration properties can be added or updated in hdfs-site.xml.
@ -90,31 +103,61 @@ HDFS NFS Gateway
<property> <property>
<name>dfs.namenode.accesstime.precision</name> <name>dfs.namenode.accesstime.precision</name>
<value>3600000</value> <value>3600000</value>
<description>The access time for HDFS file is precise upto this value. <description>The access time for HDFS file is precise upto this value.
The default value is 1 hour. Setting a value of 0 disables The default value is 1 hour. Setting a value of 0 disables
access times for HDFS. access times for HDFS.
</description> </description>
</property> </property>
---- ----
* Users are expected to update the file dump directory. NFS client often * Users are expected to update the file dump directory. NFS client often
reorders writes. Sequential writes can arrive at the NFS gateway at random reorders writes. Sequential writes can arrive at the NFS gateway at random
order. This directory is used to temporarily save out-of-order writes order. This directory is used to temporarily save out-of-order writes
before writing to HDFS. For each file, the out-of-order writes are dumped after before writing to HDFS. For each file, the out-of-order writes are dumped after
they are accumulated to exceed certain threshold (e.g., 1MB) in memory. they are accumulated to exceed certain threshold (e.g., 1MB) in memory.
One needs to make sure the directory has enough One needs to make sure the directory has enough
space. For example, if the application uploads 10 files with each having space. For example, if the application uploads 10 files with each having
100MB, it is recommended for this directory to have roughly 1GB space in case if a 100MB, it is recommended for this directory to have roughly 1GB space in case if a
worst-case write reorder happens to every file. Only NFS gateway needs to restart after worst-case write reorder happens to every file. Only NFS gateway needs to restart after
this property is updated. this property is updated.
---- ----
<property> <property>
<name>dfs.nfs3.dump.dir</name> <name>dfs.nfs3.dump.dir</name>
<value>/tmp/.hdfs-nfs</value> <value>/tmp/.hdfs-nfs</value>
</property> </property>
---- ----
* For optimal performance, it is recommended that rtmax be updated to
1MB. However, note that this 1MB is a per client allocation, and not
from a shared memory pool, and therefore a larger value may adversely
affect small reads, consuming a lot of memory. The maximum value of
this property is 1MB.
----
<property>
<name>dfs.nfs.rtmax</name>
<value>1048576</value>
<description>This is the maximum size in bytes of a READ request
supported by the NFS gateway. If you change this, make sure you
also update the nfs mount's rsize(add rsize= # of bytes to the
mount directive).
</description>
</property>
----
----
<property>
<name>dfs.nfs.wtmax</name>
<value>65536</value>
<description>This is the maximum size in bytes of a WRITE request
supported by the NFS gateway. If you change this, make sure you
also update the nfs mount's wsize(add wsize= # of bytes to the
mount directive).
</description>
</property>
----
* By default, the export can be mounted by any client. To better control the access, * By default, the export can be mounted by any client. To better control the access,
users can update the following property. The value string contains machine name and users can update the following property. The value string contains machine name and
access privilege, separated by whitespace access privilege, separated by whitespace