2011-06-12 22:00:51 +00:00
|
|
|
<?xml version="1.0"?>
|
2011-08-19 17:36:23 +00:00
|
|
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
2011-06-12 22:00:51 +00:00
|
|
|
|
|
|
|
<!--
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
-->
|
|
|
|
|
|
|
|
<!-- Do not modify this file directly. Instead, copy entries that you -->
|
|
|
|
<!-- wish to modify from this file into hdfs-site.xml and change them -->
|
|
|
|
<!-- there. If hdfs-site.xml does not already exist, create it. -->
|
|
|
|
|
|
|
|
<configuration>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>hadoop.hdfs.configuration.version</name>
|
|
|
|
<value>1</value>
|
|
|
|
<description>version of this configuration file</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.logging.level</name>
|
|
|
|
<value>info</value>
|
2012-02-06 08:21:06 +00:00
|
|
|
<description>
|
|
|
|
The logging level for dfs namenode. Other values are "dir" (trace
|
|
|
|
namespace mutations), "block" (trace block under/over replications
|
|
|
|
and block creations/deletions), or "all".
|
|
|
|
</description>
|
2011-06-12 22:00:51 +00:00
|
|
|
</property>
|
|
|
|
|
2012-09-29 00:51:48 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.rpc-address</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
|
|
|
|
the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
|
|
|
|
dfs.namenode.rpc-address.EXAMPLENAMESERVICE
|
2013-03-29 21:33:35 +00:00
|
|
|
The value of this property will take the form of nn-host1:rpc-port.
|
2012-09-29 00:51:48 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-08-27 20:53:31 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.rpc-bind-host</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
The actual address the server will bind to. If this optional address is
|
|
|
|
set, the RPC server will bind to this address and the port specified in
|
|
|
|
dfs.namenode.rpc-address for the RPC server. It can also be specified
|
|
|
|
per name node or name service for HA/Federation. This is most useful for
|
|
|
|
making name node listen to all interfaces by setting to 0.0.0.0.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-09-29 00:51:48 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.servicerpc-address</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be
|
|
|
|
connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
|
|
|
|
the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
|
|
|
|
dfs.namenode.rpc-address.EXAMPLENAMESERVICE
|
2013-03-29 21:33:35 +00:00
|
|
|
The value of this property will take the form of nn-host1:rpc-port.
|
2012-09-29 00:51:48 +00:00
|
|
|
If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-08-27 20:53:31 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.servicerpc-bind-host</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
The actual address the server will bind to. If this optional address is
|
|
|
|
set, the service RPC server will bind to this address and the port
|
|
|
|
specified in dfs.namenode.servicerpc-address. It can also be specified
|
|
|
|
per name node or name service for HA/Federation. This is most useful for
|
|
|
|
making name node listen to all interfaces by setting to 0.0.0.0.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.secondary.http-address</name>
|
|
|
|
<value>0.0.0.0:50090</value>
|
|
|
|
<description>
|
|
|
|
The secondary namenode http server address and port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.address</name>
|
|
|
|
<value>0.0.0.0:50010</value>
|
|
|
|
<description>
|
2012-08-14 20:59:54 +00:00
|
|
|
The datanode server address and port for data transfer.
|
2011-06-12 22:00:51 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.http.address</name>
|
|
|
|
<value>0.0.0.0:50075</value>
|
|
|
|
<description>
|
|
|
|
The datanode http server address and port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.ipc.address</name>
|
|
|
|
<value>0.0.0.0:50020</value>
|
|
|
|
<description>
|
|
|
|
The datanode ipc server address and port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.handler.count</name>
|
2012-08-17 16:52:07 +00:00
|
|
|
<value>10</value>
|
2011-06-12 22:00:51 +00:00
|
|
|
<description>The number of server threads for the datanode.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.http-address</name>
|
|
|
|
<value>0.0.0.0:50070</value>
|
|
|
|
<description>
|
|
|
|
The address and the base port where the dfs namenode web ui will listen on.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.https.enable</name>
|
|
|
|
<value>false</value>
|
2013-12-04 21:40:57 +00:00
|
|
|
<description>
|
|
|
|
Deprecated. Use "dfs.http.policy" instead.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.http.policy</name>
|
|
|
|
<value>HTTP_ONLY</value>
|
2011-06-12 22:00:51 +00:00
|
|
|
<description>Decide if HTTPS(SSL) is supported on HDFS
|
2013-12-04 21:40:57 +00:00
|
|
|
This configures the HTTP endpoint for HDFS daemons:
|
|
|
|
The following values are supported:
|
|
|
|
- HTTP_ONLY : Service is provided only on http
|
|
|
|
- HTTPS_ONLY : Service is provided only on https
|
|
|
|
- HTTP_AND_HTTPS : Service is provided both on http and https
|
2011-06-12 22:00:51 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.https.need-auth</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>Whether SSL client certificate authentication is required
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.https.server.keystore.resource</name>
|
|
|
|
<value>ssl-server.xml</value>
|
|
|
|
<description>Resource file from which ssl server keystore
|
|
|
|
information will be extracted
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.https.keystore.resource</name>
|
|
|
|
<value>ssl-client.xml</value>
|
|
|
|
<description>Resource file from which ssl client keystore
|
|
|
|
information will be extracted
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.https.address</name>
|
|
|
|
<value>0.0.0.0:50475</value>
|
2011-11-30 07:49:15 +00:00
|
|
|
<description>The datanode secure http server address and port.</description>
|
2011-06-12 22:00:51 +00:00
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.https-address</name>
|
|
|
|
<value>0.0.0.0:50470</value>
|
2011-11-30 07:49:15 +00:00
|
|
|
<description>The namenode secure http server address and port.</description>
|
2011-06-12 22:00:51 +00:00
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.dns.interface</name>
|
|
|
|
<value>default</value>
|
|
|
|
<description>The name of the Network Interface from which a data node should
|
|
|
|
report its IP address.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.dns.nameserver</name>
|
|
|
|
<value>default</value>
|
|
|
|
<description>The host name or IP address of the name server (DNS)
|
|
|
|
which a DataNode should use to determine the host name used by the
|
|
|
|
NameNode for communication and display purposes.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.backup.address</name>
|
|
|
|
<value>0.0.0.0:50100</value>
|
|
|
|
<description>
|
|
|
|
The backup node server address and port.
|
|
|
|
If the port is 0 then the server will start on a free port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.backup.http-address</name>
|
|
|
|
<value>0.0.0.0:50105</value>
|
|
|
|
<description>
|
|
|
|
The backup node http server address and port.
|
|
|
|
If the port is 0 then the server will start on a free port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.replication.considerLoad</name>
|
|
|
|
<value>true</value>
|
|
|
|
<description>Decide if chooseTarget considers the target's load or not
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
<property>
|
|
|
|
<name>dfs.default.chunk.view.size</name>
|
|
|
|
<value>32768</value>
|
|
|
|
<description>The number of bytes to view for a file on the browser.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.du.reserved</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.name.dir</name>
|
|
|
|
<value>file://${hadoop.tmp.dir}/dfs/name</value>
|
|
|
|
<description>Determines where on the local filesystem the DFS name node
|
|
|
|
should store the name table(fsimage). If this is a comma-delimited list
|
|
|
|
of directories then the name table is replicated in all of the
|
|
|
|
directories, for redundancy. </description>
|
|
|
|
</property>
|
|
|
|
|
2011-11-20 03:29:20 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.name.dir.restore</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>Set to true to enable NameNode to attempt recovering a
|
2012-01-01 03:11:48 +00:00
|
|
|
previously failed dfs.namenode.name.dir. When enabled, a recovery of any
|
|
|
|
failed directory is attempted during checkpoint.</description>
|
2011-11-20 03:29:20 +00:00
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.fs-limits.max-component-length</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>Defines the maximum number of characters in each component
|
|
|
|
of a path. A value of 0 will disable the check.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.fs-limits.max-directory-items</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>Defines the maximum number of items that a directory may
|
|
|
|
contain. A value of 0 will disable the check.</description>
|
|
|
|
</property>
|
|
|
|
|
2013-04-29 21:28:48 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.fs-limits.min-block-size</name>
|
|
|
|
<value>1048576</value>
|
|
|
|
<description>Minimum block size in bytes, enforced by the Namenode at create
|
|
|
|
time. This prevents the accidental creation of files with tiny block
|
|
|
|
sizes (and thus many blocks), which can degrade
|
|
|
|
performance.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.fs-limits.max-blocks-per-file</name>
|
|
|
|
<value>1048576</value>
|
|
|
|
<description>Maximum number of blocks per file, enforced by the Namenode on
|
|
|
|
write. This prevents the creation of extremely large files which can
|
|
|
|
degrade performance.</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.edits.dir</name>
|
|
|
|
<value>${dfs.namenode.name.dir}</value>
|
|
|
|
<description>Determines where on the local filesystem the DFS name node
|
|
|
|
should store the transaction (edits) file. If this is a comma-delimited list
|
|
|
|
of directories then the transaction file is replicated in all of the
|
2012-01-01 03:11:48 +00:00
|
|
|
directories, for redundancy. Default value is same as dfs.namenode.name.dir
|
2011-06-12 22:00:51 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
2011-11-30 21:46:22 +00:00
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.shared.edits.dir</name>
|
|
|
|
<value></value>
|
|
|
|
<description>A directory on shared storage between the multiple namenodes
|
|
|
|
in an HA cluster. This directory will be written by the active and read
|
|
|
|
by the standby in order to keep the namespaces synchronized. This directory
|
|
|
|
does not need to be listed in dfs.namenode.edits.dir above. It should be
|
|
|
|
left empty in a non-HA cluster.
|
|
|
|
</description>
|
|
|
|
</property>
|
2011-06-12 22:00:51 +00:00
|
|
|
|
2012-07-20 00:25:50 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.edits.journal-plugin.qjournal</name>
|
|
|
|
<value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.permissions.enabled</name>
|
|
|
|
<value>true</value>
|
|
|
|
<description>
|
|
|
|
If "true", enable permission checking in HDFS.
|
|
|
|
If "false", permission checking is turned off,
|
|
|
|
but all other behavior is unchanged.
|
|
|
|
Switching from one parameter value to the other does not change the mode,
|
|
|
|
owner or group of files or directories.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.permissions.superusergroup</name>
|
|
|
|
<value>supergroup</value>
|
|
|
|
<description>The name of the group of super-users.</description>
|
|
|
|
</property>
|
|
|
|
<!--
|
|
|
|
<property>
|
|
|
|
<name>dfs.cluster.administrators</name>
|
|
|
|
<value>ACL for the admins</value>
|
|
|
|
<description>This configuration is used to control who can access the
|
|
|
|
default servlets in the namenode, etc.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
-->
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.block.access.token.enable</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
If "true", access tokens are used as capabilities for accessing datanodes.
|
|
|
|
If "false", no access tokens are checked on accessing datanodes.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.block.access.key.update.interval</name>
|
|
|
|
<value>600</value>
|
|
|
|
<description>
|
|
|
|
Interval in minutes at which namenode updates its access keys.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.block.access.token.lifetime</name>
|
|
|
|
<value>600</value>
|
|
|
|
<description>The lifetime of access tokens in minutes.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.data.dir</name>
|
|
|
|
<value>file://${hadoop.tmp.dir}/dfs/data</value>
|
|
|
|
<description>Determines where on the local filesystem an DFS data node
|
|
|
|
should store its blocks. If this is a comma-delimited
|
|
|
|
list of directories, then data will be stored in all named
|
|
|
|
directories, typically on different devices.
|
|
|
|
Directories that do not exist are ignored.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.data.dir.perm</name>
|
|
|
|
<value>700</value>
|
|
|
|
<description>Permissions for the directories on on the local filesystem where
|
|
|
|
the DFS data node store its blocks. The permissions can either be octal or
|
|
|
|
symbolic.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.replication</name>
|
|
|
|
<value>3</value>
|
|
|
|
<description>Default block replication.
|
|
|
|
The actual number of replications can be specified when the file is created.
|
|
|
|
The default is used if replication is not specified in create time.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.replication.max</name>
|
|
|
|
<value>512</value>
|
|
|
|
<description>Maximal block replication.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.replication.min</name>
|
|
|
|
<value>1</value>
|
|
|
|
<description>Minimal block replication.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.blocksize</name>
|
2012-10-18 22:56:30 +00:00
|
|
|
<value>134217728</value>
|
2012-01-04 14:15:15 +00:00
|
|
|
<description>
|
|
|
|
The default block size for new files, in bytes.
|
|
|
|
You can use the following suffix (case insensitive):
|
|
|
|
k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such as 128k, 512m, 1g, etc.),
|
|
|
|
Or provide complete size in bytes (such as 134217728 for 128 MB).
|
|
|
|
</description>
|
2011-06-12 22:00:51 +00:00
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.block.write.retries</name>
|
|
|
|
<value>3</value>
|
|
|
|
<description>The number of retries for writing blocks to the data nodes,
|
|
|
|
before we signal failure to the application.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
|
2012-02-23 02:27:07 +00:00
|
|
|
<value>true</value>
|
2011-06-12 22:00:51 +00:00
|
|
|
<description>
|
|
|
|
If there is a datanode/network failure in the write pipeline,
|
|
|
|
DFSClient will try to remove the failed datanode from the pipeline
|
|
|
|
and then continue writing with the remaining datanodes. As a result,
|
|
|
|
the number of datanodes in the pipeline is decreased. The feature is
|
|
|
|
to add new datanodes to the pipeline.
|
|
|
|
|
2012-02-13 20:57:48 +00:00
|
|
|
This is a site-wide property to enable/disable the feature.
|
2011-06-12 22:00:51 +00:00
|
|
|
|
2012-03-19 19:54:13 +00:00
|
|
|
When the cluster size is extremely small, e.g. 3 nodes or less, cluster
|
|
|
|
administrators may want to set the policy to NEVER in the default
|
|
|
|
configuration file or disable this feature. Otherwise, users may
|
|
|
|
experience an unusually high rate of pipeline failures since it is
|
|
|
|
impossible to find new datanodes for replacement.
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
See also dfs.client.block.write.replace-datanode-on-failure.policy
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.block.write.replace-datanode-on-failure.policy</name>
|
|
|
|
<value>DEFAULT</value>
|
|
|
|
<description>
|
|
|
|
This property is used only if the value of
|
|
|
|
dfs.client.block.write.replace-datanode-on-failure.enable is true.
|
|
|
|
|
|
|
|
ALWAYS: always add a new datanode when an existing datanode is removed.
|
|
|
|
|
|
|
|
NEVER: never add a new datanode.
|
|
|
|
|
|
|
|
DEFAULT:
|
|
|
|
Let r be the replication number.
|
|
|
|
Let n be the number of existing datanodes.
|
|
|
|
Add a new datanode only if r is greater than or equal to 3 and either
|
|
|
|
(1) floor(r/2) is greater than or equal to n; or
|
|
|
|
(2) r is greater than n and the block is hflushed/appended.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.blockreport.intervalMsec</name>
|
|
|
|
<value>21600000</value>
|
|
|
|
<description>Determines block reporting interval in milliseconds.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.blockreport.initialDelay</name> <value>0</value>
|
|
|
|
<description>Delay for first block report in seconds.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.directoryscan.interval</name>
|
|
|
|
<value>21600</value>
|
|
|
|
<description>Interval in seconds for Datanode to scan data directories and
|
|
|
|
reconcile the difference between blocks in memory and on the disk.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.directoryscan.threads</name>
|
|
|
|
<value>1</value>
|
|
|
|
<description>How many threads should the threadpool used to compile reports
|
|
|
|
for volumes in parallel have.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.heartbeat.interval</name>
|
|
|
|
<value>3</value>
|
|
|
|
<description>Determines datanode heartbeat interval in seconds.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.handler.count</name>
|
|
|
|
<value>10</value>
|
|
|
|
<description>The number of server threads for the namenode.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.safemode.threshold-pct</name>
|
|
|
|
<value>0.999f</value>
|
|
|
|
<description>
|
|
|
|
Specifies the percentage of blocks that should satisfy
|
|
|
|
the minimal replication requirement defined by dfs.namenode.replication.min.
|
|
|
|
Values less than or equal to 0 mean not to wait for any particular
|
|
|
|
percentage of blocks before exiting safemode.
|
|
|
|
Values greater than 1 will make safe mode permanent.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.safemode.min.datanodes</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>
|
|
|
|
Specifies the number of datanodes that must be considered alive
|
|
|
|
before the name node exits safemode.
|
|
|
|
Values less than or equal to 0 mean not to take the number of live
|
|
|
|
datanodes into account when deciding whether to remain in safe mode
|
|
|
|
during startup.
|
|
|
|
Values greater than the number of datanodes in the cluster
|
|
|
|
will make safe mode permanent.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.safemode.extension</name>
|
|
|
|
<value>30000</value>
|
|
|
|
<description>
|
|
|
|
Determines extension of safe mode in milliseconds
|
|
|
|
after the threshold level is reached.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.balance.bandwidthPerSec</name>
|
|
|
|
<value>1048576</value>
|
|
|
|
<description>
|
|
|
|
Specifies the maximum amount of bandwidth that each datanode
|
|
|
|
can utilize for the balancing purpose in term of
|
|
|
|
the number of bytes per second.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.hosts</name>
|
|
|
|
<value></value>
|
|
|
|
<description>Names a file that contains a list of hosts that are
|
|
|
|
permitted to connect to the namenode. The full pathname of the file
|
|
|
|
must be specified. If the value is empty, all hosts are
|
|
|
|
permitted.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.hosts.exclude</name>
|
|
|
|
<value></value>
|
|
|
|
<description>Names a file that contains a list of hosts that are
|
|
|
|
not permitted to connect to the namenode. The full pathname of the
|
|
|
|
file must be specified. If the value is empty, no hosts are
|
|
|
|
excluded.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.max.objects</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>The maximum number of files, directories and blocks
|
|
|
|
dfs supports. A value of zero indicates no limit to the number
|
|
|
|
of objects that dfs supports.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-10-17 02:14:33 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
|
|
|
|
<value>true</value>
|
|
|
|
<description>
|
|
|
|
If true (the default), then the namenode requires that a connecting
|
|
|
|
datanode's address must be resolved to a hostname. If necessary, a reverse
|
|
|
|
DNS lookup is performed. All attempts to register a datanode from an
|
|
|
|
unresolvable address are rejected.
|
|
|
|
|
|
|
|
It is recommended that this setting be left on to prevent accidental
|
|
|
|
registration of datanodes listed by hostname in the excludes file during a
|
|
|
|
DNS outage. Only set this to false in environments where there is no
|
|
|
|
infrastructure to support reverse DNS lookup.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.decommission.interval</name>
|
|
|
|
<value>30</value>
|
|
|
|
<description>Namenode periodicity in seconds to check if decommission is
|
|
|
|
complete.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.decommission.nodes.per.interval</name>
|
|
|
|
<value>5</value>
|
|
|
|
<description>The number of nodes namenode checks if decommission is complete
|
|
|
|
in each dfs.namenode.decommission.interval.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.replication.interval</name>
|
|
|
|
<value>3</value>
|
|
|
|
<description>The periodicity in seconds with which the namenode computes
|
|
|
|
repliaction work for datanodes. </description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.accesstime.precision</name>
|
|
|
|
<value>3600000</value>
|
|
|
|
<description>The access time for HDFS file is precise upto this value.
|
|
|
|
The default value is 1 hour. Setting a value of 0 disables
|
|
|
|
access times for HDFS.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.plugins</name>
|
|
|
|
<value></value>
|
|
|
|
<description>Comma-separated list of datanode plug-ins to be activated.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.plugins</name>
|
|
|
|
<value></value>
|
|
|
|
<description>Comma-separated list of namenode plug-ins to be activated.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.stream-buffer-size</name>
|
|
|
|
<value>4096</value>
|
|
|
|
<description>The size of buffer to stream files.
|
|
|
|
The size of this buffer should probably be a multiple of hardware
|
|
|
|
page size (4096 on Intel x86), and it determines how much data is
|
|
|
|
buffered during read and write operations.</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.bytes-per-checksum</name>
|
|
|
|
<value>512</value>
|
|
|
|
<description>The number of bytes per checksum. Must not be larger than
|
|
|
|
dfs.stream-buffer-size</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client-write-packet-size</name>
|
|
|
|
<value>65536</value>
|
|
|
|
<description>Packet size for clients to write</description>
|
|
|
|
</property>
|
|
|
|
|
2013-03-29 21:33:35 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
|
|
|
|
<value>600000</value>
|
|
|
|
<description>The maximum period to keep a DN in the excluded nodes list
|
|
|
|
at a client. After this period, in milliseconds, the previously excluded node(s) will
|
|
|
|
be removed automatically from the cache and will be considered good for block allocations
|
|
|
|
again. Useful to lower or raise in situations where you keep a file open for very long
|
|
|
|
periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
|
|
|
|
restarts. Defaults to 10 minutes.</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.checkpoint.dir</name>
|
|
|
|
<value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>
|
|
|
|
<description>Determines where on the local filesystem the DFS secondary
|
|
|
|
name node should store the temporary images to merge.
|
|
|
|
If this is a comma-delimited list of directories then the image is
|
|
|
|
replicated in all of the directories for redundancy.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.checkpoint.edits.dir</name>
|
|
|
|
<value>${dfs.namenode.checkpoint.dir}</value>
|
|
|
|
<description>Determines where on the local filesystem the DFS secondary
|
|
|
|
name node should store the temporary edits to merge.
|
|
|
|
If this is a comma-delimited list of directoires then teh edits is
|
|
|
|
replicated in all of the directoires for redundancy.
|
|
|
|
Default value is same as dfs.namenode.checkpoint.dir
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.checkpoint.period</name>
|
|
|
|
<value>3600</value>
|
|
|
|
<description>The number of seconds between two periodic checkpoints.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
2011-07-29 16:28:45 +00:00
|
|
|
<name>dfs.namenode.checkpoint.txns</name>
|
2013-03-29 21:33:35 +00:00
|
|
|
<value>1000000</value>
|
2011-07-29 16:28:45 +00:00
|
|
|
<description>The Secondary NameNode or CheckpointNode will create a checkpoint
|
|
|
|
of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
|
|
|
|
of whether 'dfs.namenode.checkpoint.period' has expired.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.checkpoint.check.period</name>
|
|
|
|
<value>60</value>
|
|
|
|
<description>The SecondaryNameNode and CheckpointNode will poll the NameNode
|
|
|
|
every 'dfs.namenode.checkpoint.check.period' seconds to query the number
|
|
|
|
of uncheckpointed transactions.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-03-29 21:33:35 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.checkpoint.max-retries</name>
|
|
|
|
<value>3</value>
|
|
|
|
<description>The SecondaryNameNode retries failed checkpointing. If the
|
|
|
|
failure occurs while loading fsimage or replaying edits, the number of
|
|
|
|
retries is limited by this variable.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2011-07-29 16:28:45 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.num.checkpoints.retained</name>
|
|
|
|
<value>2</value>
|
|
|
|
<description>The number of image checkpoint files that will be retained by
|
|
|
|
the NameNode and Secondary NameNode in their storage directories. All edit
|
|
|
|
logs necessary to recover an up-to-date namespace from the oldest retained
|
|
|
|
checkpoint will also be retained.
|
2011-06-12 22:00:51 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-02-07 01:39:14 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.num.extra.edits.retained</name>
|
|
|
|
<value>1000000</value>
|
|
|
|
<description>The number of extra transactions which should be retained
|
|
|
|
beyond what is minimally necessary for a NN restart. This can be useful for
|
|
|
|
audit purposes or for an HA setup where a remote Standby Node may have
|
|
|
|
been offline for some time and need to have a longer backlog of retained
|
|
|
|
edits in order to start again.
|
|
|
|
Typically each edit is on the order of a few hundred bytes, so the default
|
|
|
|
of 1 million edits should be on the order of hundreds of MBs or low GBs.
|
2012-10-16 01:59:46 +00:00
|
|
|
|
|
|
|
NOTE: Fewer extra edits may be retained than value specified for this setting
|
|
|
|
if doing so would mean that more segments would be retained than the number
|
|
|
|
configured by dfs.namenode.max.extra.edits.segments.retained.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.max.extra.edits.segments.retained</name>
|
|
|
|
<value>10000</value>
|
|
|
|
<description>The maximum number of extra edit log segments which should be retained
|
|
|
|
beyond what is minimally necessary for a NN restart. When used in conjunction with
|
|
|
|
dfs.namenode.num.extra.edits.retained, this configuration property serves to cap
|
|
|
|
the number of extra edits files to a reasonable value.
|
2012-02-07 01:39:14 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.delegation.key.update-interval</name>
|
|
|
|
<value>86400000</value>
|
|
|
|
<description>The update interval for master key for delegation tokens
|
|
|
|
in the namenode in milliseconds.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.delegation.token.max-lifetime</name>
|
|
|
|
<value>604800000</value>
|
|
|
|
<description>The maximum lifetime in milliseconds for which a delegation
|
|
|
|
token is valid.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.delegation.token.renew-interval</name>
|
|
|
|
<value>86400000</value>
|
|
|
|
<description>The renewal interval for delegation token in milliseconds.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.failed.volumes.tolerated</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>The number of volumes that are allowed to
|
|
|
|
fail before a datanode stops offering service. By default
|
|
|
|
any volume failure will cause a datanode to shutdown.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.image.compress</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>Should the dfs image be compressed?
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.image.compression.codec</name>
|
|
|
|
<value>org.apache.hadoop.io.compress.DefaultCodec</value>
|
|
|
|
<description>If the dfs image is compressed, how should they be compressed?
|
|
|
|
This has to be a codec defined in io.compression.codecs.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-03-29 21:33:35 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.image.transfer.timeout</name>
|
|
|
|
<value>600000</value>
|
|
|
|
<description>
|
|
|
|
Timeout for image transfer in milliseconds. This timeout and the related
|
|
|
|
dfs.image.transfer.bandwidthPerSec parameter should be configured such
|
|
|
|
that normal image transfer can complete within the timeout.
|
|
|
|
This timeout prevents client hangs when the sender fails during
|
|
|
|
image transfer, which is particularly important during checkpointing.
|
|
|
|
Note that this timeout applies to the entirety of image transfer, and
|
|
|
|
is not a socket timeout.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.image.transfer.bandwidthPerSec</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>
|
2013-03-29 21:33:35 +00:00
|
|
|
Maximum bandwidth used for image transfer in bytes per second.
|
|
|
|
This can help keep normal namenode operations responsive during
|
|
|
|
checkpointing. The maximum bandwidth and timeout in
|
|
|
|
dfs.image.transfer.timeout should be set such that normal image
|
|
|
|
transfers can complete successfully.
|
2011-06-12 22:00:51 +00:00
|
|
|
A default value of 0 indicates that throttling is disabled.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.support.allow.format</name>
|
|
|
|
<value>true</value>
|
|
|
|
<description>Does HDFS namenode allow itself to be formatted?
|
|
|
|
You may consider setting this to false for any production
|
|
|
|
cluster, to avoid any possibility of formatting a running DFS.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.max.transfer.threads</name>
|
|
|
|
<value>4096</value>
|
|
|
|
<description>
|
|
|
|
Specifies the maximum number of threads to use for transferring data
|
|
|
|
in and out of the DN.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-07-23 16:27:59 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.readahead.bytes</name>
|
|
|
|
<value>4193404</value>
|
|
|
|
<description>
|
|
|
|
While reading block files, if the Hadoop native libraries are available,
|
|
|
|
the datanode can use the posix_fadvise system call to explicitly
|
|
|
|
page data into the operating system buffer cache ahead of the current
|
|
|
|
reader's position. This can improve performance especially when
|
|
|
|
disks are highly contended.
|
|
|
|
|
|
|
|
This configuration specifies the number of bytes ahead of the current
|
|
|
|
read position which the datanode will attempt to read ahead. This
|
|
|
|
feature may be disabled by configuring this property to 0.
|
|
|
|
|
|
|
|
If the native libraries are not available, this configuration has no
|
|
|
|
effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.drop.cache.behind.reads</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
In some workloads, the data read from HDFS is known to be significantly
|
|
|
|
large enough that it is unlikely to be useful to cache it in the
|
|
|
|
operating system buffer cache. In this case, the DataNode may be
|
|
|
|
configured to automatically purge all data from the buffer cache
|
|
|
|
after it is delivered to the client. This behavior is automatically
|
|
|
|
disabled for workloads which read only short sections of a block
|
|
|
|
(e.g HBase random-IO workloads).
|
|
|
|
|
|
|
|
This may improve performance for some workloads by freeing buffer
|
|
|
|
cache spage usage for more cacheable data.
|
|
|
|
|
|
|
|
If the Hadoop native libraries are not available, this configuration
|
|
|
|
has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.drop.cache.behind.writes</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
In some workloads, the data written to HDFS is known to be significantly
|
|
|
|
large enough that it is unlikely to be useful to cache it in the
|
|
|
|
operating system buffer cache. In this case, the DataNode may be
|
|
|
|
configured to automatically purge all data from the buffer cache
|
|
|
|
after it is written to disk.
|
|
|
|
|
|
|
|
This may improve performance for some workloads by freeing buffer
|
|
|
|
cache spage usage for more cacheable data.
|
|
|
|
|
|
|
|
If the Hadoop native libraries are not available, this configuration
|
|
|
|
has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.sync.behind.writes</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
If this configuration is enabled, the datanode will instruct the
|
|
|
|
operating system to enqueue all written data to the disk immediately
|
|
|
|
after it is written. This differs from the usual OS policy which
|
|
|
|
may wait for up to 30 seconds before triggering writeback.
|
|
|
|
|
|
|
|
This may improve performance for some workloads by smoothing the
|
|
|
|
IO profile for data written to disk.
|
|
|
|
|
|
|
|
If the Hadoop native libraries are not available, this configuration
|
|
|
|
has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-02-06 08:21:06 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.client.failover.max.attempts</name>
|
|
|
|
<value>15</value>
|
|
|
|
<description>
|
|
|
|
Expert only. The number of client failover attempts that should be
|
|
|
|
made before the failover is considered failed.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.failover.sleep.base.millis</name>
|
|
|
|
<value>500</value>
|
|
|
|
<description>
|
|
|
|
Expert only. The time to wait, in milliseconds, between failover
|
|
|
|
attempts increases exponentially as a function of the number of
|
|
|
|
attempts made so far, with a random factor of +/- 50%. This option
|
|
|
|
specifies the base value used in the failover calculation. The
|
|
|
|
first failover will retry immediately. The 2nd failover attempt
|
|
|
|
will delay at least dfs.client.failover.sleep.base.millis
|
|
|
|
milliseconds. And so on.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.failover.sleep.max.millis</name>
|
|
|
|
<value>15000</value>
|
|
|
|
<description>
|
|
|
|
Expert only. The time to wait, in milliseconds, between failover
|
|
|
|
attempts increases exponentially as a function of the number of
|
|
|
|
attempts made so far, with a random factor of +/- 50%. This option
|
|
|
|
specifies the maximum value to wait between failovers.
|
|
|
|
Specifically, the time between two failover attempts will not
|
|
|
|
exceed +/- 50% of dfs.client.failover.sleep.max.millis
|
|
|
|
milliseconds.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.failover.connection.retries</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>
|
|
|
|
Expert only. Indicates the number of retries a failover IPC client
|
|
|
|
will make to establish a server connection.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.failover.connection.retries.on.timeouts</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>
|
|
|
|
Expert only. The number of retry attempts a failover IPC client
|
|
|
|
will make on socket timeout when establishing a server connection.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
2012-05-20 21:13:17 +00:00
|
|
|
<name>dfs.nameservices</name>
|
2012-02-06 08:21:06 +00:00
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
Comma-separated list of nameservices.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
2012-05-20 21:13:17 +00:00
|
|
|
<name>dfs.nameservice.id</name>
|
2012-02-06 08:21:06 +00:00
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
The ID of this nameservice. If the nameservice ID is not
|
|
|
|
configured or more than one nameservice is configured for
|
2012-05-20 21:13:17 +00:00
|
|
|
dfs.nameservices it is determined automatically by
|
2012-02-06 08:21:06 +00:00
|
|
|
matching the local node's address with the configured address.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
The prefix for a given nameservice, contains a comma-separated
|
|
|
|
list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.ha.namenode.id</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
The ID of this namenode. If the namenode ID is not configured it
|
|
|
|
is determined automatically by matching the local node's address
|
|
|
|
with the configured address.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.ha.log-roll.period</name>
|
|
|
|
<value>120</value>
|
|
|
|
<description>
|
|
|
|
How often, in seconds, the StandbyNode should ask the active to
|
|
|
|
roll edit logs. Since the StandbyNode only reads from finalized
|
|
|
|
log segments, the StandbyNode will only be as up-to-date as how
|
|
|
|
often the logs are rolled. Note that failover triggers a log roll
|
|
|
|
so the StandbyNode will be up to date before it becomes active.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.ha.tail-edits.period</name>
|
|
|
|
<value>60</value>
|
|
|
|
<description>
|
|
|
|
How often, in seconds, the StandbyNode should check for new
|
|
|
|
finalized log segments in the shared edits log.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-04-12 21:53:42 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.ha.automatic-failover.enabled</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
Whether automatic failover is enabled. See the HDFS High
|
|
|
|
Availability documentation for details on automatic HA
|
|
|
|
configuration.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-04-02 23:13:06 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.support.append</name>
|
|
|
|
<value>true</value>
|
|
|
|
<description>
|
|
|
|
Does HDFS allow appends to files?
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-08-14 20:59:54 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.client.use.datanode.hostname</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>Whether clients should use datanode hostnames when
|
|
|
|
connecting to datanodes.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.use.datanode.hostname</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>Whether datanodes should use datanode hostnames when
|
|
|
|
connecting to other datanodes for data transfer.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-04-02 23:20:34 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.client.local.interfaces</name>
|
|
|
|
<value></value>
|
|
|
|
<description>A comma separated list of network interface names to use
|
|
|
|
for data transfer between the client and datanodes. When creating
|
|
|
|
a connection to read from or write to a datanode, the client
|
|
|
|
chooses one of the specified interfaces at random and binds its
|
|
|
|
socket to the IP of that interface. Individual names may be
|
|
|
|
specified as either an interface name (eg "eth0"), a subinterface
|
|
|
|
name (eg "eth0:0"), or an IP address (which may be specified using
|
|
|
|
CIDR notation to match a range of IPs).
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-05-04 21:58:44 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.kerberos.internal.spnego.principal</name>
|
|
|
|
<value>${dfs.web.authentication.kerberos.principal}</value>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
|
|
|
|
<value>${dfs.web.authentication.kerberos.principal}</value>
|
|
|
|
</property>
|
|
|
|
|
2012-09-13 05:46:18 +00:00
|
|
|
<property>
|
2013-02-02 22:18:50 +00:00
|
|
|
<name>dfs.namenode.avoid.read.stale.datanode</name>
|
2012-09-13 05:46:18 +00:00
|
|
|
<value>false</value>
|
|
|
|
<description>
|
2013-02-02 22:18:50 +00:00
|
|
|
Indicate whether or not to avoid reading from "stale" datanodes whose
|
2012-10-11 18:08:27 +00:00
|
|
|
heartbeat messages have not been received by the namenode
|
2013-02-02 22:18:50 +00:00
|
|
|
for more than a specified time interval. Stale datanodes will be
|
2012-10-11 18:08:27 +00:00
|
|
|
moved to the end of the node list returned for reading. See
|
2013-02-02 22:18:50 +00:00
|
|
|
dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.
|
2012-10-11 18:08:27 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.avoid.write.stale.datanode</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
2013-02-02 22:18:50 +00:00
|
|
|
Indicate whether or not to avoid writing to "stale" datanodes whose
|
2012-10-11 18:08:27 +00:00
|
|
|
heartbeat messages have not been received by the namenode
|
2013-02-02 22:18:50 +00:00
|
|
|
for more than a specified time interval. Writes will avoid using
|
|
|
|
stale datanodes unless more than a configured ratio
|
|
|
|
(dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as
|
|
|
|
stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting
|
|
|
|
for reads.
|
2012-09-13 05:46:18 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.stale.datanode.interval</name>
|
|
|
|
<value>30000</value>
|
|
|
|
<description>
|
2012-10-11 18:08:27 +00:00
|
|
|
Default time interval for marking a datanode as "stale", i.e., if
|
|
|
|
the namenode has not received heartbeat msg from a datanode for
|
|
|
|
more than this time interval, the datanode will be marked and treated
|
|
|
|
as "stale" by default. The stale interval cannot be too small since
|
|
|
|
otherwise this may cause too frequent change of stale states.
|
|
|
|
We thus set a minimum stale interval value (the default value is 3 times
|
|
|
|
of heartbeat interval) and guarantee that the stale interval cannot be less
|
2013-04-26 20:51:45 +00:00
|
|
|
than the minimum value. A stale data node is avoided during lease/block
|
|
|
|
recovery. It can be conditionally avoided for reads (see
|
|
|
|
dfs.namenode.avoid.read.stale.datanode) and for writes (see
|
|
|
|
dfs.namenode.avoid.write.stale.datanode).
|
2012-10-11 18:08:27 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.write.stale.datanode.ratio</name>
|
|
|
|
<value>0.5f</value>
|
|
|
|
<description>
|
|
|
|
When the ratio of number stale datanodes to total datanodes marked
|
|
|
|
is greater than this ratio, stop avoiding writing to stale nodes so
|
|
|
|
as to prevent causing hotspots.
|
2012-09-13 05:46:18 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-06-28 17:54:52 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.invalidate.work.pct.per.iteration</name>
|
|
|
|
<value>0.32f</value>
|
|
|
|
<description>
|
|
|
|
*Note*: Advanced property. Change with caution.
|
|
|
|
This determines the percentage amount of block
|
|
|
|
invalidations (deletes) to do over a single DN heartbeat
|
|
|
|
deletion command. The final deletion count is determined by applying this
|
|
|
|
percentage to the number of live nodes in the system.
|
|
|
|
The resultant number is the number of blocks from the deletion list
|
|
|
|
chosen for proper invalidation over a single heartbeat of a single DN.
|
|
|
|
Value should be a positive, non-zero percentage in float notation (X.Yf),
|
|
|
|
with 1.0f meaning 100%.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.replication.work.multiplier.per.iteration</name>
|
|
|
|
<value>2</value>
|
|
|
|
<description>
|
|
|
|
*Note*: Advanced property. Change with caution.
|
|
|
|
This determines the total amount of block transfers to begin in
|
|
|
|
parallel at a DN, for replication, when such a command list is being
|
|
|
|
sent over a DN heartbeat by the NN. The actual number is obtained by
|
|
|
|
multiplying this multiplier with the total number of live nodes in the
|
|
|
|
cluster. The result number is the number of blocks to begin transfers
|
|
|
|
immediately for, per DN heartbeat. This number can be any positive,
|
|
|
|
non-zero integer.
|
|
|
|
</description>
|
|
|
|
</property>
|
2012-05-04 21:58:44 +00:00
|
|
|
|
2012-07-07 22:35:14 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.webhdfs.enabled</name>
|
2013-11-21 07:16:16 +00:00
|
|
|
<value>true</value>
|
2012-07-07 22:35:14 +00:00
|
|
|
<description>
|
|
|
|
Enable WebHDFS (REST API) in Namenodes and Datanodes.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-07-20 19:15:52 +00:00
|
|
|
<property>
|
|
|
|
<name>hadoop.fuse.connection.timeout</name>
|
|
|
|
<value>300</value>
|
|
|
|
<description>
|
|
|
|
The minimum number of seconds that we'll cache libhdfs connection objects
|
|
|
|
in fuse_dfs. Lower values will result in lower memory consumption; higher
|
|
|
|
values may speed up access by avoiding the overhead of creating new
|
|
|
|
connection objects.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>hadoop.fuse.timer.period</name>
|
|
|
|
<value>5</value>
|
|
|
|
<description>
|
|
|
|
The number of seconds between cache expiry checks in fuse_dfs. Lower values
|
|
|
|
will result in fuse_dfs noticing changes to Kerberos ticket caches more
|
|
|
|
quickly.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-07-27 00:26:21 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.metrics.percentiles.intervals</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
Comma-delimited set of integers denoting the desired rollover intervals
|
|
|
|
(in seconds) for percentile latency metrics on the Namenode and Datanode.
|
|
|
|
By default, percentile latency metrics are disabled.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-08-07 16:40:03 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.encrypt.data.transfer</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
Whether or not actual block data that is read/written from/to HDFS should
|
|
|
|
be encrypted on the wire. This only needs to be set on the NN and DNs,
|
|
|
|
clients will deduce this automatically.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.encrypt.data.transfer.algorithm</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
This value may be set to either "3des" or "rc4". If nothing is set, then
|
|
|
|
the configured JCE default on the system is used (usually 3DES.) It is
|
|
|
|
widely believed that 3DES is more cryptographically secure, but RC4 is
|
|
|
|
substantially faster.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-08-17 16:52:07 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
Boolean which enables backend datanode-side support for the experimental DistributedFileSystem#getFileVBlockStorageLocations API.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.file-block-storage-locations.num-threads</name>
|
|
|
|
<value>10</value>
|
|
|
|
<description>
|
|
|
|
Number of threads used for making parallel RPCs in DistributedFileSystem#getFileBlockStorageLocations().
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.file-block-storage-locations.timeout</name>
|
|
|
|
<value>60</value>
|
|
|
|
<description>
|
|
|
|
Timeout (in seconds) for the parallel RPCs made in DistributedFileSystem#getFileBlockStorageLocations().
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-09-01 00:35:19 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.journalnode.rpc-address</name>
|
|
|
|
<value>0.0.0.0:8485</value>
|
|
|
|
<description>
|
|
|
|
The JournalNode RPC server address and port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.journalnode.http-address</name>
|
|
|
|
<value>0.0.0.0:8480</value>
|
|
|
|
<description>
|
|
|
|
The address and port the JournalNode web UI listens on.
|
|
|
|
If the port is 0 then the server will start on a free port.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2012-12-06 22:27:27 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.audit.loggers</name>
|
|
|
|
<value>default</value>
|
|
|
|
<description>
|
|
|
|
List of classes implementing audit loggers that will receive audit events.
|
|
|
|
These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger.
|
|
|
|
The special value "default" can be used to reference the default audit
|
|
|
|
logger, which uses the configured log system. Installing custom audit loggers
|
|
|
|
may affect the performance and stability of the NameNode. Refer to the custom
|
|
|
|
logger's documentation for more details.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-01-21 19:29:06 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.domain.socket.path</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
Optional. This is a path to a UNIX domain socket that will be used for
|
|
|
|
communication between the DataNode and local HDFS clients.
|
|
|
|
If the string "_PORT" is present in this path, it will be replaced by the
|
|
|
|
TCP port of the DataNode.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-04-06 04:08:09 +00:00
|
|
|
<property>
|
2013-05-16 23:21:35 +00:00
|
|
|
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
|
2013-04-06 04:08:09 +00:00
|
|
|
<value>10737418240</value> <!-- 10 GB -->
|
|
|
|
<description>
|
|
|
|
Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
|
|
|
|
org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
|
|
|
|
This setting controls how much DN volumes are allowed to differ in terms of
|
|
|
|
bytes of free disk space before they are considered imbalanced. If the free
|
|
|
|
space of all the volumes are within this range of each other, the volumes
|
|
|
|
will be considered balanced and block assignments will be done on a pure
|
|
|
|
round robin basis.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
2013-05-16 23:21:35 +00:00
|
|
|
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
|
2013-04-06 04:08:09 +00:00
|
|
|
<value>0.75f</value>
|
|
|
|
<description>
|
|
|
|
Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
|
|
|
|
org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
|
|
|
|
This setting controls what percentage of new block allocations will be sent
|
|
|
|
to volumes with more available disk space than others. This setting should
|
|
|
|
be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should
|
|
|
|
be no reason to prefer that volumes with less available disk space receive
|
|
|
|
more block allocations.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-06-08 23:28:37 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.edits.noeditlogchannelflush</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
Specifies whether to flush edit log file channel. When set, expensive
|
|
|
|
FileChannel#force calls are skipped and synchronous disk writes are
|
|
|
|
enabled instead by opening the edit log file with RandomAccessFile("rws")
|
|
|
|
flags. This can significantly improve the performance of edit log writes
|
|
|
|
on the Windows platform.
|
|
|
|
Note that the behavior of the "rws" flags is platform and hardware specific
|
|
|
|
and might not provide the same level of guarantees as FileChannel#force.
|
|
|
|
For example, the write will skip the disk-cache on SAS and SCSI devices
|
|
|
|
while it might not on SATA devices. This is an expert level setting,
|
|
|
|
change with caution.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-07-22 18:15:18 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.client.cache.drop.behind.writes</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
Just like dfs.datanode.drop.cache.behind.writes, this setting causes the
|
|
|
|
page cache to be dropped behind HDFS writes, potentially freeing up more
|
|
|
|
memory for other uses. Unlike dfs.datanode.drop.cache.behind.writes, this
|
|
|
|
is a client-side setting rather than a setting for the entire datanode.
|
|
|
|
If present, this setting will override the DataNode default.
|
|
|
|
|
|
|
|
If the native libraries are not available to the DataNode, this
|
|
|
|
configuration has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.cache.drop.behind.reads</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
Just like dfs.datanode.drop.cache.behind.reads, this setting causes the
|
|
|
|
page cache to be dropped behind HDFS reads, potentially freeing up more
|
|
|
|
memory for other uses. Unlike dfs.datanode.drop.cache.behind.reads, this
|
|
|
|
is a client-side setting rather than a setting for the entire datanode. If
|
|
|
|
present, this setting will override the DataNode default.
|
|
|
|
|
|
|
|
If the native libraries are not available to the DataNode, this
|
|
|
|
configuration has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.cache.readahead</name>
|
|
|
|
<value></value>
|
|
|
|
<description>
|
|
|
|
Just like dfs.datanode.readahead.bytes, this setting causes the datanode to
|
|
|
|
read ahead in the block file using posix_fadvise, potentially decreasing
|
|
|
|
I/O wait times. Unlike dfs.datanode.readahead.bytes, this is a client-side
|
|
|
|
setting rather than a setting for the entire datanode. If present, this
|
|
|
|
setting will override the DataNode default.
|
|
|
|
|
|
|
|
If the native libraries are not available to the DataNode, this
|
|
|
|
configuration has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-07-26 01:09:27 +00:00
|
|
|
<property>
|
2013-11-11 19:25:47 +00:00
|
|
|
<name>dfs.namenode.enable.retrycache</name>
|
|
|
|
<value>true</value>
|
|
|
|
<description>
|
|
|
|
This enables the retry cache on the namenode. Namenode tracks for
|
|
|
|
non-idempotent requests the corresponding response. If a client retries the
|
|
|
|
request, the response from the retry cache is sent. Such operations
|
|
|
|
are tagged with annotation @AtMostOnce in namenode protocols. It is
|
|
|
|
recommended that this flag be set to true. Setting it to false, will result
|
|
|
|
in clients getting failure responses to retried request. This flag must
|
|
|
|
be enabled in HA setup for transparent fail-overs.
|
|
|
|
|
|
|
|
The entries in the cache have expiration time configurable
|
|
|
|
using dfs.namenode.retrycache.expirytime.millis.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.retrycache.expirytime.millis</name>
|
|
|
|
<value>600000</value>
|
|
|
|
<description>
|
|
|
|
The time for which retry cache entries are retained.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.retrycache.heap.percent</name>
|
|
|
|
<value>0.03f</value>
|
|
|
|
<description>
|
|
|
|
This parameter configures the heap size allocated for retry cache
|
|
|
|
(excluding the response cached). This corresponds to approximately
|
|
|
|
4096 entries for every 64MB of namenode process java heap size.
|
|
|
|
Assuming retry cache entry expiration time (configured using
|
|
|
|
dfs.namenode.retrycache.expirytime.millis) of 10 minutes, this
|
|
|
|
enables retry cache to support 7 operations per second sustained
|
|
|
|
for 10 minutes. As the heap size is increased, the operation rate
|
|
|
|
linearly increases.
|
|
|
|
</description>
|
2013-07-26 01:09:27 +00:00
|
|
|
</property>
|
2013-08-20 18:07:47 +00:00
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.mmap.cache.size</name>
|
|
|
|
<value>1024</value>
|
|
|
|
<description>
|
|
|
|
When zero-copy reads are used, the DFSClient keeps a cache of recently used
|
|
|
|
memory mapped regions. This parameter controls the maximum number of
|
|
|
|
entries that we will keep in that cache.
|
|
|
|
|
|
|
|
If this is set to 0, we will not allow mmap.
|
|
|
|
|
|
|
|
The larger this number is, the more file descriptors we will potentially
|
|
|
|
use for memory-mapped files. mmaped files also use virtual address space.
|
|
|
|
You may need to increase your ulimit virtual address space limits before
|
|
|
|
increasing the client mmap cache size.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.client.mmap.cache.timeout.ms</name>
|
|
|
|
<value>900000</value>
|
|
|
|
<description>
|
|
|
|
The minimum length of time that we will keep an mmap entry in the cache
|
|
|
|
between uses. If an entry is in the cache longer than this, and nobody
|
|
|
|
uses it, it will be removed by a background thread.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-09-16 05:35:25 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.caching.enabled</name>
|
|
|
|
<value>false</value>
|
|
|
|
<description>
|
|
|
|
Set to true to enable block caching. This flag enables the NameNode to
|
|
|
|
maintain a mapping of cached blocks to DataNodes via processing DataNode
|
|
|
|
cache reports. Based on these reports and addition and removal of caching
|
|
|
|
directives, the NameNode will schedule caching and uncaching work.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-08-24 03:41:25 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.max.locked.memory</name>
|
|
|
|
<value>0</value>
|
|
|
|
<description>
|
|
|
|
The amount of memory in bytes to use for caching of block replicas in
|
|
|
|
memory on the datanode. The datanode's maximum locked memory soft ulimit
|
|
|
|
(RLIMIT_MEMLOCK) must be set to at least this value, else the datanode
|
|
|
|
will abort on startup.
|
|
|
|
|
2013-09-16 05:35:25 +00:00
|
|
|
By default, this parameter is set to 0, which disables in-memory caching.
|
|
|
|
|
|
|
|
If the native libraries are not available to the DataNode, this
|
|
|
|
configuration has no effect.
|
2013-08-24 03:41:25 +00:00
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-10-30 19:01:13 +00:00
|
|
|
<property>
|
2013-11-07 22:07:16 +00:00
|
|
|
<name>dfs.namenode.list.cache.directives.num.responses</name>
|
2013-10-30 19:01:13 +00:00
|
|
|
<value>100</value>
|
|
|
|
<description>
|
2013-11-07 22:07:16 +00:00
|
|
|
This value controls the number of cache directives that the NameNode will
|
2013-10-30 19:01:13 +00:00
|
|
|
send over the wire in response to a listDirectives RPC.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.list.cache.pools.num.responses</name>
|
|
|
|
<value>100</value>
|
|
|
|
<description>
|
|
|
|
This value controls the number of cache pools that the NameNode will
|
|
|
|
send over the wire in response to a listPools RPC.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-10-16 22:15:33 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.path.based.cache.refresh.interval.ms</name>
|
|
|
|
<value>300000</value>
|
|
|
|
<description>
|
|
|
|
The amount of milliseconds between subsequent path cache rescans. Path
|
|
|
|
cache rescans are when we calculate which blocks should be cached, and on
|
|
|
|
what datanodes.
|
|
|
|
|
|
|
|
By default, this parameter is set to 300000, which is five minutes.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-11-13 18:18:37 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.path.based.cache.retry.interval.ms</name>
|
|
|
|
<value>60000</value>
|
|
|
|
<description>
|
|
|
|
When the NameNode needs to uncache something that is cached, or cache
|
|
|
|
something that is not cached, it must direct the DataNodes to do so by
|
|
|
|
sending a DNA_CACHE or DNA_UNCACHE command in response to a DataNode
|
|
|
|
heartbeat. This parameter controls how frequently the NameNode will
|
|
|
|
resend these commands.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-08-24 03:41:25 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.datanode.fsdatasetcache.max.threads.per.volume</name>
|
|
|
|
<value>4</value>
|
|
|
|
<description>
|
|
|
|
The maximum number of threads per volume to use for caching new data
|
|
|
|
on the datanode. These threads consume both I/O and CPU. This can affect
|
|
|
|
normal datanode operations.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-09-16 05:35:25 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.cachereport.intervalMsec</name>
|
|
|
|
<value>10000</value>
|
|
|
|
<description>
|
|
|
|
Determines cache reporting interval in milliseconds. After this amount of
|
|
|
|
time, the DataNode sends a full report of its cache state to the NameNode.
|
|
|
|
The NameNode uses the cache report to update its map of cached blocks to
|
|
|
|
DataNode locations.
|
|
|
|
|
|
|
|
This configuration has no effect if in-memory caching has been disabled by
|
|
|
|
setting dfs.datanode.max.locked.memory to 0 (which is the default).
|
|
|
|
|
|
|
|
If the native libraries are not available to the DataNode, this
|
|
|
|
configuration has no effect.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2013-11-01 20:49:54 +00:00
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name>
|
|
|
|
<value>2.0</value>
|
|
|
|
<description>
|
|
|
|
Determines when an active namenode will roll its own edit log.
|
|
|
|
The actual threshold (in number of edits) is determined by multiplying
|
|
|
|
this value by dfs.namenode.checkpoint.txns.
|
|
|
|
|
|
|
|
This prevents extremely large edit files from accumulating on the active
|
|
|
|
namenode, which can cause timeouts during namenode startup and pose an
|
|
|
|
administrative hassle. This behavior is intended as a failsafe for when
|
|
|
|
the standby or secondary namenode fail to roll the edit log by the normal
|
|
|
|
checkpoint threshold.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
|
|
|
<property>
|
|
|
|
<name>dfs.namenode.edit.log.autoroll.check.interval.ms</name>
|
|
|
|
<value>300000</value>
|
|
|
|
<description>
|
|
|
|
How often an active namenode will check if it needs to roll its edit log,
|
|
|
|
in milliseconds.
|
|
|
|
</description>
|
|
|
|
</property>
|
|
|
|
|
2011-06-12 22:00:51 +00:00
|
|
|
</configuration>
|