HDFS-6080. Improve NFS gateway performance by making rtmax and wtmax configurable. Contributed by Abin Shahab
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1577319 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aab466558a
commit
842aa2bc94
@ -206,6 +206,12 @@ public static WriteStableHow fromValue(int id) {
|
|||||||
public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
|
public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
|
||||||
public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump";
|
public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump";
|
||||||
public static final boolean ENABLE_FILE_DUMP_DEFAULT = true;
|
public static final boolean ENABLE_FILE_DUMP_DEFAULT = true;
|
||||||
|
public static final String MAX_READ_TRANSFER_SIZE_KEY = "dfs.nfs.rtmax";
|
||||||
|
public static final int MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
|
||||||
|
public static final String MAX_WRITE_TRANSFER_SIZE_KEY = "dfs.nfs.wtmax";
|
||||||
|
public static final int MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
|
||||||
|
public static final String MAX_READDIR_TRANSFER_SIZE_KEY = "dfs.nfs.dtmax";
|
||||||
|
public static final int MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
|
||||||
public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files";
|
public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files";
|
||||||
public static final int MAX_OPEN_FILES_DEFAULT = 256;
|
public static final int MAX_OPEN_FILES_DEFAULT = 256;
|
||||||
public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout";
|
public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout";
|
||||||
|
@ -142,9 +142,6 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
|
|||||||
(short) DEFAULT_UMASK);
|
(short) DEFAULT_UMASK);
|
||||||
|
|
||||||
static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
|
static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
|
||||||
private static final int MAX_READ_TRANSFER_SIZE = 64 * 1024;
|
|
||||||
private static final int MAX_WRITE_TRANSFER_SIZE = 64 * 1024;
|
|
||||||
private static final int MAX_READDIR_TRANSFER_SIZE = 64 * 1024;
|
|
||||||
|
|
||||||
private final Configuration config = new Configuration();
|
private final Configuration config = new Configuration();
|
||||||
private final WriteManager writeManager;
|
private final WriteManager writeManager;
|
||||||
@ -553,7 +550,11 @@ public READLINK3Response readlink(XDR xdr, SecurityHandler securityHandler,
|
|||||||
+ handle.getFileId());
|
+ handle.getFileId());
|
||||||
return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
|
return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
|
||||||
}
|
}
|
||||||
if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) {
|
int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
|
||||||
|
Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
|
||||||
|
if (rtmax < target.getBytes().length) {
|
||||||
|
LOG.error("Link size: " + target.getBytes().length
|
||||||
|
+ " is larger than max transfer size: " + rtmax);
|
||||||
return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr,
|
return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr,
|
||||||
new byte[0]);
|
new byte[0]);
|
||||||
}
|
}
|
||||||
@ -649,7 +650,9 @@ public READ3Response read(XDR xdr, SecurityHandler securityHandler,
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int buffSize = Math.min(MAX_READ_TRANSFER_SIZE, count);
|
int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
|
||||||
|
Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
|
||||||
|
int buffSize = Math.min(rtmax, count);
|
||||||
byte[] readbuffer = new byte[buffSize];
|
byte[] readbuffer = new byte[buffSize];
|
||||||
|
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
@ -1714,9 +1717,12 @@ public FSINFO3Response fsinfo(XDR xdr, SecurityHandler securityHandler,
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int rtmax = MAX_READ_TRANSFER_SIZE;
|
int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
|
||||||
int wtmax = MAX_WRITE_TRANSFER_SIZE;
|
Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
|
||||||
int dtperf = MAX_READDIR_TRANSFER_SIZE;
|
int wtmax = config.getInt(Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_KEY,
|
||||||
|
Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_DEFAULT);
|
||||||
|
int dtperf = config.getInt(Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_KEY,
|
||||||
|
Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_DEFAULT);
|
||||||
|
|
||||||
Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
|
Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
|
||||||
Nfs3Utils.getFileIdPath(handle), iug);
|
Nfs3Utils.getFileIdPath(handle), iug);
|
||||||
|
@ -400,6 +400,9 @@ Release 2.4.0 - UNRELEASED
|
|||||||
|
|
||||||
HDFS-6072. Clean up dead code of FSImage. (wheat9)
|
HDFS-6072. Clean up dead code of FSImage. (wheat9)
|
||||||
|
|
||||||
|
HDFS-6080. Improve NFS gateway performance by making rtmax and wtmax
|
||||||
|
configurable. (Abin Shahab via brandonli)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
||||||
|
@ -48,35 +48,48 @@ HDFS NFS Gateway
|
|||||||
|
|
||||||
The user running the NFS-gateway must be able to proxy all the users using the NFS mounts.
|
The user running the NFS-gateway must be able to proxy all the users using the NFS mounts.
|
||||||
For instance, if user 'nfsserver' is running the gateway, and users belonging to the groups 'nfs-users1'
|
For instance, if user 'nfsserver' is running the gateway, and users belonging to the groups 'nfs-users1'
|
||||||
and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set:
|
and 'nfs-users2' use the NFS mounts, then in core-site.xml of the namenode, the following must be set
|
||||||
|
(NOTE: replace 'nfsserver' with the user name starting the gateway in your cluster):
|
||||||
|
|
||||||
----
|
----
|
||||||
<property>
|
<property>
|
||||||
<name>hadoop.proxyuser.nfsserver.groups</name>
|
<name>hadoop.proxyuser.nfsserver.groups</name>
|
||||||
<value>nfs-users1,nfs-users2</value>
|
<value>nfs-users1,nfs-users2</value>
|
||||||
<description>
|
<description>
|
||||||
The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and 'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group.
|
The 'nfsserver' user is allowed to proxy all members of the 'nfs-users1' and
|
||||||
|
'nfs-users2' groups. Set this to '*' to allow nfsserver user to proxy any group.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
----
|
||||||
|
|
||||||
|
----
|
||||||
<property>
|
<property>
|
||||||
<name>hadoop.proxyuser.nfsserver.hosts</name>
|
<name>hadoop.proxyuser.nfsserver.hosts</name>
|
||||||
<value>nfs-client-host1.com</value>
|
<value>nfs-client-host1.com</value>
|
||||||
<description>
|
<description>
|
||||||
This is the host where the nfs gateway is running. Set this to '*' to allow requests from any hosts to be proxied.
|
This is the host where the nfs gateway is running. Set this to '*' to allow
|
||||||
|
requests from any hosts to be proxied.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
----
|
----
|
||||||
|
|
||||||
The above are the only required configuration for the NFS gateway in non-secure mode. For Kerberized
|
The above are the only required configuration for the NFS gateway in non-secure mode. For Kerberized
|
||||||
hadoop clusters, the following configurations need to be added to hdfs-site.xml:
|
hadoop clusters, the following configurations need to be added to hdfs-site.xml:
|
||||||
|
|
||||||
----
|
----
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.nfsgateway.keytab.file</name>
|
<name>dfs.nfsgateway.keytab.file</name>
|
||||||
<value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab -->
|
<value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab -->
|
||||||
</property>
|
</property>
|
||||||
<property>
|
|
||||||
<name>dfs.nfsgateway.kerberos.principal</name>
|
|
||||||
<value>nfsserver/_HOST@YOUR-REALM.COM</value>
|
|
||||||
</property>
|
|
||||||
----
|
----
|
||||||
|
|
||||||
|
----
|
||||||
|
<property>
|
||||||
|
<name>dfs.nfsgateway.kerberos.principal</name>
|
||||||
|
<value>nfsserver/_HOST@YOUR-REALM.COM</value>
|
||||||
|
</property>
|
||||||
|
----
|
||||||
|
|
||||||
It's strongly recommended for the users to update a few configuration properties based on their use
|
It's strongly recommended for the users to update a few configuration properties based on their use
|
||||||
cases. All the related configuration properties can be added or updated in hdfs-site.xml.
|
cases. All the related configuration properties can be added or updated in hdfs-site.xml.
|
||||||
|
|
||||||
@ -90,31 +103,61 @@ HDFS NFS Gateway
|
|||||||
<property>
|
<property>
|
||||||
<name>dfs.namenode.accesstime.precision</name>
|
<name>dfs.namenode.accesstime.precision</name>
|
||||||
<value>3600000</value>
|
<value>3600000</value>
|
||||||
<description>The access time for HDFS file is precise upto this value.
|
<description>The access time for HDFS file is precise upto this value.
|
||||||
The default value is 1 hour. Setting a value of 0 disables
|
The default value is 1 hour. Setting a value of 0 disables
|
||||||
access times for HDFS.
|
access times for HDFS.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
----
|
----
|
||||||
|
|
||||||
* Users are expected to update the file dump directory. NFS client often
|
* Users are expected to update the file dump directory. NFS client often
|
||||||
reorders writes. Sequential writes can arrive at the NFS gateway at random
|
reorders writes. Sequential writes can arrive at the NFS gateway at random
|
||||||
order. This directory is used to temporarily save out-of-order writes
|
order. This directory is used to temporarily save out-of-order writes
|
||||||
before writing to HDFS. For each file, the out-of-order writes are dumped after
|
before writing to HDFS. For each file, the out-of-order writes are dumped after
|
||||||
they are accumulated to exceed certain threshold (e.g., 1MB) in memory.
|
they are accumulated to exceed certain threshold (e.g., 1MB) in memory.
|
||||||
One needs to make sure the directory has enough
|
One needs to make sure the directory has enough
|
||||||
space. For example, if the application uploads 10 files with each having
|
space. For example, if the application uploads 10 files with each having
|
||||||
100MB, it is recommended for this directory to have roughly 1GB space in case if a
|
100MB, it is recommended for this directory to have roughly 1GB space in case if a
|
||||||
worst-case write reorder happens to every file. Only NFS gateway needs to restart after
|
worst-case write reorder happens to every file. Only NFS gateway needs to restart after
|
||||||
this property is updated.
|
this property is updated.
|
||||||
|
|
||||||
----
|
----
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.nfs3.dump.dir</name>
|
<name>dfs.nfs3.dump.dir</name>
|
||||||
<value>/tmp/.hdfs-nfs</value>
|
<value>/tmp/.hdfs-nfs</value>
|
||||||
</property>
|
</property>
|
||||||
----
|
----
|
||||||
|
|
||||||
|
* For optimal performance, it is recommended that rtmax be updated to
|
||||||
|
1MB. However, note that this 1MB is a per client allocation, and not
|
||||||
|
from a shared memory pool, and therefore a larger value may adversely
|
||||||
|
affect small reads, consuming a lot of memory. The maximum value of
|
||||||
|
this property is 1MB.
|
||||||
|
|
||||||
|
----
|
||||||
|
<property>
|
||||||
|
<name>dfs.nfs.rtmax</name>
|
||||||
|
<value>1048576</value>
|
||||||
|
<description>This is the maximum size in bytes of a READ request
|
||||||
|
supported by the NFS gateway. If you change this, make sure you
|
||||||
|
also update the nfs mount's rsize(add rsize= # of bytes to the
|
||||||
|
mount directive).
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
----
|
||||||
|
|
||||||
|
----
|
||||||
|
<property>
|
||||||
|
<name>dfs.nfs.wtmax</name>
|
||||||
|
<value>65536</value>
|
||||||
|
<description>This is the maximum size in bytes of a WRITE request
|
||||||
|
supported by the NFS gateway. If you change this, make sure you
|
||||||
|
also update the nfs mount's wsize(add wsize= # of bytes to the
|
||||||
|
mount directive).
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
----
|
||||||
|
|
||||||
* By default, the export can be mounted by any client. To better control the access,
|
* By default, the export can be mounted by any client. To better control the access,
|
||||||
users can update the following property. The value string contains machine name and
|
users can update the following property. The value string contains machine name and
|
||||||
access privilege, separated by whitespace
|
access privilege, separated by whitespace
|
||||||
|
Loading…
Reference in New Issue
Block a user