From 031f980e7e92a7deb2c0469cf57bb375b2ca5322 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Mon, 1 Dec 2014 16:21:20 -0800 Subject: [PATCH] HADOOP-11081. Document hadoop properties expected to be set by the shell code in *-env.sh (aw) --- .../hadoop-common/CHANGES.txt | 3 + .../hadoop-common/src/main/conf/hadoop-env.sh | 112 ++++++++++++------ .../hadoop-hdfs/src/main/bin/hdfs | 2 + .../hadoop-hdfs/src/main/bin/hdfs-config.sh | 7 +- hadoop-mapreduce-project/conf/mapred-env.sh | 8 +- .../hadoop-yarn/conf/yarn-env.sh | 19 +-- 6 files changed, 99 insertions(+), 52 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 59ccba06b7..8a544c1bed 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -145,6 +145,9 @@ Trunk (Unreleased) HADOOP-10926. Improve smart-apply-patch.sh to apply binary diffs (cmccabe) + HADOOP-11081. Document hadoop properties expected to be set by the shell + code in *-env.sh (aw) + BUG FIXES HADOOP-9451. Fault single-layer config if node group topology is enabled. diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index af8e54492e..ed9382b345 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -37,31 +37,38 @@ # JAVA_HOME=/usr/java/testing hdfs dfs -ls # # Therefore, the vast majority (BUT NOT ALL!) of these defaults -# are configured for substitution and not append. If you would -# like append, you'll # need to modify this file accordingly. +# are configured for substitution and not append. If append +# is preferable, modify this file accordingly. ### # Generic settings for HADOOP ### # Technically, the only required environment variable is JAVA_HOME. -# All others are optional. However, our defaults are probably not -# your defaults. Many sites configure these options outside of Hadoop, +# All others are optional. However, the defaults are probably not +# preferred. Many sites configure these options outside of Hadoop, # such as in /etc/profile.d -# The java implementation to use. -export JAVA_HOME=${JAVA_HOME:-"hadoop-env.sh is not configured"} +# The java implementation to use. By default, this environment +# variable is REQUIRED on ALL platforms except OS X! +# export JAVA_HOME= + +# Location of Hadoop. By default, Hadoop will attempt to determine +# this location based upon its execution path. +# export HADOOP_PREFIX= # Location of Hadoop's configuration information. i.e., where this -# file is probably living. You will almost certainly want to set -# this in /etc/profile.d or equivalent. +# file is probably living. Many sites will also set this in the +# same location where JAVA_HOME is defined. If this is not defined +# Hadoop will attempt to locate it based upon its execution +# path. # export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop # The maximum amount of heap to use, in MB. Default is 1024. # export HADOOP_HEAPSIZE=1024 # Extra Java runtime options for all Hadoop commands. We don't support -# IPv6 yet/still, so by default we set preference to IPv4. +# IPv6 yet/still, so by default the preference is set to IPv4. # export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true" # Some parts of the shell code may do special things dependent upon @@ -72,8 +79,8 @@ export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} # Under certain conditions, Java on OS X will throw SCDynamicStore errors # in the system logs. -# See HADOOP-8719 for more information. If you need Kerberos -# support on OS X, you'll want to change/remove this extra bit. +# See HADOOP-8719 for more information. If one needs Kerberos +# support on OS X, one will want to change/remove this extra bit. case ${HADOOP_OS_TYPE} in Darwin*) export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= " @@ -82,11 +89,11 @@ case ${HADOOP_OS_TYPE} in ;; esac -# Extra Java runtime options for Hadoop clients (i.e., hdfs dfs -blah) -# These get added to HADOOP_OPTS for such commands. In most cases, -# this should be left empty and let users supply it on the -# command line. -# extra HADOOP_CLIENT_OPTS="" +# Extra Java runtime options for some Hadoop commands +# and clients (i.e., hdfs dfs -blah). These get appended to HADOOP_OPTS for +# such commands. In most cases, # this should be left empty and +# let users supply it on the command line. +# export HADOOP_CLIENT_OPTS="" # # A note about classpaths. @@ -149,20 +156,22 @@ esac # # -# You can define variables right here and then re-use them later on. -# For example, it is common to use the same garbage collection settings -# for all the daemons. So we could define: +# Many options may also be specified as Java properties. It is +# very common, and in many cases, desirable, to hard-set these +# in daemon _OPTS variables. Where applicable, the appropriate +# Java property is also identified. Note that many are re-used +# or set differently in certain contexts (e.g., secure vs +# non-secure) # -# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps" -# -# .. and then use it as per the b option under the namenode. -# Where (primarily) daemon log files are stored. -# $HADOOP_PREFIX/logs by default. +# Where (primarily) daemon log files are stored. # $HADOOP_PREFIX/logs +# by default. +# Java property: hadoop.log.dir # export HADOOP_LOG_DIR=${HADOOP_PREFIX}/logs # A string representing this instance of hadoop. $USER by default. # This is used in writing log and pid files, so keep that in mind! +# Java property: hadoop.id.str # export HADOOP_IDENT_STRING=$USER # How many seconds to pause after stopping a daemon @@ -171,23 +180,26 @@ esac # Where pid files are stored. /tmp by default. # export HADOOP_PID_DIR=/tmp -# Default log level and output location -# This sets the hadoop.root.logger property +# Default log4j setting for interactive commands +# Java property: hadoop.root.logger # export HADOOP_ROOT_LOGGER=INFO,console -# Default log level for daemons spawned explicitly by +# Default log4j setting for daemons spawned explicitly by # --daemon option of hadoop, hdfs, mapred and yarn command. -# This sets the hadoop.root.logger property +# Java property: hadoop.root.logger # export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA # Default log level and output location for security-related messages. -# It sets -Dhadoop.security.logger on the command line. -# You will almost certainly want to change this on a per-daemon basis! +# You will almost certainly want to change this on a per-daemon basis via +# the Java property (i.e., -Dhadoop.security.logger=foo). (Note that the +# defaults for the NN and 2NN override this by default.) +# Java property: hadoop.security.logger # export HADOOP_SECURITY_LOGGER=INFO,NullAppender # Default log level for file system audit messages. -# It sets -Dhdfs.audit.logger on the command line. -# You will almost certainly want to change this on a per-daemon basis! +# Generally, this is specifically set in the namenode-specific +# options line. +# Java property: hdfs.audit.logger # export HADOOP_AUDIT_LOGGER=INFO,NullAppender # Default process priority level @@ -195,8 +207,19 @@ esac # export HADOOP_NICENESS=0 # Default name for the service level authorization file +# Java property: hadoop.policy.file # export HADOOP_POLICYFILE="hadoop-policy.xml" +# +# NOTE: this is not used by default! <----- +# You can define variables right here and then re-use them later on. +# For example, it is common to use the same garbage collection settings +# for all the daemons. So one could define: +# +# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps" +# +# .. and then use it as per the b option under the namenode. + ### # Secure/privileged execution ### @@ -219,18 +242,26 @@ esac # # This directory contains the logs for secure and privileged processes. +# Java property: hadoop.log.dir # export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR} # # When running a secure daemon, the default value of HADOOP_IDENT_STRING # ends up being a bit bogus. Therefore, by default, the code will -# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER. If you want +# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER. If one wants # to keep HADOOP_IDENT_STRING untouched, then uncomment this line. # export HADOOP_SECURE_IDENT_PRESERVE="true" ### # NameNode specific parameters ### + +# Default log level and output location for file system related change +# messages. For non-namenode daemons, the Java property must be set in +# the appropriate _OPTS if one wants something other than INFO,NullAppender +# Java property: hdfs.audit.logger +# export HDFS_AUDIT_LOGGER=INFO,NullAppender + # Specify the JVM options to be used when starting the NameNode. # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS @@ -245,7 +276,7 @@ esac # export HADOOP_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" # this is the default: -# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender" +# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" ### # SecondaryNameNode specific parameters @@ -255,7 +286,7 @@ esac # and therefore may override any similar flags set in HADOOP_OPTS # # This is the default: -# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender" +# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" ### # DataNode specific parameters @@ -272,14 +303,16 @@ esac # to provide authentication of data transfer protocol. This **MUST NOT** be # defined if SASL is configured for authentication of data transfer protocol # using non-privileged ports. +# This will replace the hadoop.id.str Java property in secure mode. # export HADOOP_SECURE_DN_USER=hdfs # Supplemental options for secure datanodes -# By default, we use jsvc which needs to know to launch a +# By default, Hadoop uses jsvc which needs to know to launch a # server jvm. # export HADOOP_DN_SECURE_EXTRA_OPTS="-jvm server" # Where datanode log files are stored in the secure data environment. +# This will replace the hadoop.log.dir Java property in secure mode. # export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_SECURE_LOG_DIR} # Where datanode pid files are stored in the secure data environment. @@ -301,11 +334,12 @@ esac # export HADOOP_PORTMAP_OPTS="-Xmx512m" # Supplemental options for priviliged gateways -# By default, we use jsvc which needs to know to launch a +# By default, Hadoop uses jsvc which needs to know to launch a # server jvm. # export HADOOP_NFS3_SECURE_EXTRA_OPTS="-jvm server" # On privileged gateways, user to run the gateway as after dropping privileges +# This will replace the hadoop.id.str Java property in secure mode. # export HADOOP_PRIVILEGED_NFS_USER=nfsserver ### @@ -349,7 +383,7 @@ esac ### # -# When building Hadoop, you can add the class paths to your commands +# When building Hadoop, one can add the class paths to the commands # via this special env var: -# HADOOP_ENABLE_BUILD_PATHS="true" +# export HADOOP_ENABLE_BUILD_PATHS="true" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 8ff0ee61f6..ee666f3a25 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -180,6 +180,7 @@ case ${COMMAND} in CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' hadoop_debug "Appending HADOOP_NAMENODE_OPTS onto HADOOP_OPTS" HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}" + hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" ;; nfs3) supportdaemonization="true" @@ -221,6 +222,7 @@ case ${COMMAND} in CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' hadoop_debug "Appending HADOOP_SECONDARYNAMENODE_OPTS onto HADOOP_OPTS" HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}" + hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" ;; snapshotDiff) CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh index bb17aa1db6..ed7b0420ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh @@ -56,11 +56,12 @@ function hadoop_subproject_init HADOOP_IDENT_STRING="${HADOOP_HDFS_IDENT_STRING:-$HADOOP_IDENT_STRING}" HADOOP_HDFS_IDENT_STRING="${HADOOP_IDENT_STRING}" + # turn on the defaults - - export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"} - export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"} + export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} + export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS"} + export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS"} export HADOOP_DATANODE_OPTS=${HADOOP_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"} export HADOOP_DN_SECURE_EXTRA_OPTS=${HADOOP_DN_SECURE_EXTRA_OPTS:-"-jvm server"} export HADOOP_NFS3_SECURE_EXTRA_OPTS=${HADOOP_NFS3_SECURE_EXTRA_OPTS:-"-jvm server"} diff --git a/hadoop-mapreduce-project/conf/mapred-env.sh b/hadoop-mapreduce-project/conf/mapred-env.sh index 6c417a3a0f..8a4b372932 100644 --- a/hadoop-mapreduce-project/conf/mapred-env.sh +++ b/hadoop-mapreduce-project/conf/mapred-env.sh @@ -29,9 +29,11 @@ ### #Override the log4j settings for all MR apps +# Java property: hadoop.root.logger # export MAPRED_ROOT_LOGGER="INFO,console" # Override Hadoop's log directory & file +# Java property: hadoop.log.dir # export HADOOP_MAPRED_LOG_DIR="" # Override Hadoop's pid directory @@ -39,6 +41,7 @@ # Override Hadoop's identity string. $USER by default. # This is used in writing log and pid files, so keep that in mind! +# Java property: hadoop.id.str # export HADOOP_MAPRED_IDENT_STRING=$USER # Override Hadoop's process priority @@ -54,8 +57,8 @@ # the value to 1000. # This value will be overridden by an Xmx setting specified in either # MAPRED_OPTS, HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS. -# If not specified, the default value will be picked from either YARN_HEAPMAX -# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +# If not specified, the default value will be picked from either HADOOP_HEAPSIZE +# or the built-in default. # #export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 @@ -65,6 +68,7 @@ #export HADOOP_JOB_HISTORYSERVER_OPTS= # Specify the log4j settings for the JobHistoryServer +# Java property: hadoop.root.logger #export HADOOP_JHS_LOGGER=INFO,RFA diff --git a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh index 0f5e9b7c08..3d3a036d73 100644 --- a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh +++ b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh @@ -31,15 +31,18 @@ # By default, YARN will use HADOOP_LOG_DIR for YARN logging. Specify a custom # log directory for YARN things here: +# Java properties: hadoop.log.dir, yarn.log.dir # export YARN_LOG_DIR="${HADOOP_LOG_DIR}" # By default, YARN will use the value of HADOOP_LOGFILE as the 'fallback' log # file # when log4j settings are not defined. Specify a custom YARN log file # here: +# Java properties: hadoop.log.file, yarn.log.file # export YARN_LOGFILE=${HADOOP_LOGFILE} #Override the log4j settings for all YARN apps By default, YARN will use # HADOOP_ROOT_LOGGER. +# Java properties: hadoop.root.logger, yarn.root.logger # export YARN_ROOT_LOGGER=${HADOOP_ROOT_LOGGER} ### @@ -51,8 +54,8 @@ # the value to 1000. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_RESOURCEMANAGER_OPTS. -# If not specified, the default value will be picked from either YARN_HEAPMAX -# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +# If not specified, the default value will be picked from either HADOOP_HEAPSIZE +# or the built-in default. # #export YARN_RESOURCEMANAGER_HEAPSIZE=1000 @@ -85,8 +88,8 @@ # the value to 1000. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_NODEMANAGER_OPTS. -# If not specified, the default value will be picked from either YARN_HEAPMAX -# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +# If not specified, the default value will be picked from either HADOOP_HEAPSIZE +# or the built-in default. # #export YARN_NODEMANAGER_HEAPSIZE=1000 @@ -107,8 +110,8 @@ # the value to 1000. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_TIMELINESERVER_OPTS. -# If not specified, the default value will be picked from either YARN_HEAPMAX -# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +# If not specified, the default value will be picked from either HADOOP_HEAPSIZE +# or the built-in default. # #export YARN_TIMELINESERVER_HEAPSIZE=1000 @@ -129,8 +132,8 @@ # the value to 1000. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_PROXYSERVER_OPTS. -# If not specified, the default value will be picked from either YARN_HEAPMAX -# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. +# If not specified, the default value will be picked from either HADOOP_HEAPSIZE +# or the built-in default. # #export YARN_PROXYSERVER_HEAPSIZE=1000