From a7c6c710b2366cea1b7c24e3a2cd46be1eb0f05b Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Wed, 10 Dec 2014 13:37:32 -0800 Subject: [PATCH] HADOOP-10950. rework heap management vars (John Smith via aw) --- .../hadoop-common/CHANGES.txt | 2 + .../hadoop-common/src/main/bin/hadoop | 2 - .../src/main/bin/hadoop-functions.sh | 36 ++++++++++----- .../hadoop-common/src/main/bin/rcc | 2 - .../hadoop-common/src/main/conf/hadoop-env.sh | 15 ++++++- .../conf/hadoop-user-functions.sh.example | 7 --- .../hadoop-hdfs/src/main/bin/hdfs | 1 - hadoop-mapreduce-project/bin/mapred | 3 +- hadoop-mapreduce-project/conf/mapred-env.sh | 17 +++---- hadoop-yarn-project/hadoop-yarn/bin/yarn | 14 +++--- .../hadoop-yarn/conf/yarn-env.sh | 44 +++++++------------ 11 files changed, 73 insertions(+), 70 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 6242ceefe4..7310dd41eb 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -11,6 +11,8 @@ Trunk (Unreleased) HADOOP-9902. Shell script rewrite (aw) + HADOOP-10950. rework heap management vars (John Smith via aw) + NEW FEATURES HADOOP-9629. Support Windows Azure Storage - Blob as a file system in Hadoop. diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index 9df2c7dfa8..be38382ab9 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -183,8 +183,6 @@ esac hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" -hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}" - hadoop_finalize hadoop_java_exec "${COMMAND}" "${CLASS}" "$@" diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index 2b56634a0f..3e353d92d3 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -162,7 +162,6 @@ function hadoop_basic_init HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-${HADOOP_LOGLEVEL},console} HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_DAEMON_ROOT_LOGGER:-${HADOOP_LOGLEVEL},RFA} HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender} - HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024} HADOOP_SSH_OPTS=${HADOOP_SSH_OPTS:-"-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"} HADOOP_SECURE_LOG_DIR=${HADOOP_SECURE_LOG_DIR:-${HADOOP_LOG_DIR}} HADOOP_SECURE_PID_DIR=${HADOOP_SECURE_PID_DIR:-${HADOOP_PID_DIR}} @@ -597,15 +596,6 @@ function hadoop_java_setup hadoop_error "ERROR: $JAVA is not executable." exit 1 fi - # shellcheck disable=SC2034 - JAVA_HEAP_MAX=-Xmx1g - HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024} - - # check envvars which might override default args - if [[ -n "$HADOOP_HEAPSIZE" ]]; then - # shellcheck disable=SC2034 - JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m" - fi } function hadoop_finalize_libpaths @@ -617,6 +607,31 @@ function hadoop_finalize_libpaths fi } +function hadoop_finalize_hadoop_heap +{ + if [[ -n "${HADOOP_HEAPSIZE_MAX}" ]]; then + if [[ "${HADOOP_HEAPSIZE_MAX}" =~ ^[0-9]+$ ]]; then + HADOOP_HEAPSIZE_MAX="${HADOOP_HEAPSIZE_MAX}m" + fi + hadoop_add_param HADOOP_OPTS Xmx "-Xmx${HADOOP_HEAPSIZE_MAX}" + fi + + # backwards compatibility + if [[ -n "${HADOOP_HEAPSIZE}" ]]; then + if [[ "${HADOOP_HEAPSIZE}" =~ ^[0-9]+$ ]]; then + HADOOP_HEAPSIZE="${HADOOP_HEAPSIZE}m" + fi + hadoop_add_param HADOOP_OPTS Xmx "-Xmx${HADOOP_HEAPSIZE}" + fi + + if [[ -n "${HADOOP_HEAPSIZE_MIN}" ]]; then + if [[ "${HADOOP_HEAPSIZE_MIN}" =~ ^[0-9]+$ ]]; then + HADOOP_HEAPSIZE_MIN="${HADOOP_HEAPSIZE_MIN}m" + fi + hadoop_add_param HADOOP_OPTS Xms "-Xms${HADOOP_HEAPSIZE_MIN}" + fi +} + # # fill in any last minute options that might not have been defined yet # @@ -646,6 +661,7 @@ function hadoop_finalize # override of CONF dirs and more hadoop_finalize_classpath hadoop_finalize_libpaths + hadoop_finalize_hadoop_heap hadoop_finalize_hadoop_opts } diff --git a/hadoop-common-project/hadoop-common/src/main/bin/rcc b/hadoop-common-project/hadoop-common/src/main/bin/rcc index 74253539fb..512fc2cc52 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/rcc +++ b/hadoop-common-project/hadoop-common/src/main/bin/rcc @@ -37,7 +37,5 @@ CLASS='org.apache.hadoop.record.compiler.generated.Rcc' hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" -hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}" - hadoop_finalize hadoop_java_exec rcc "${CLASS}" "$@" diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index ed9382b345..588b02a2d3 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -64,8 +64,19 @@ # path. # export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop -# The maximum amount of heap to use, in MB. Default is 1024. -# export HADOOP_HEAPSIZE=1024 +# The maximum amount of heap to use (Java -Xmx). If no unit +# is provided, it will be converted to MB. Daemons will +# prefer any Xmx setting in their respective _OPT variable. +# There is no default; the JVM will autoscale based upon machine +# memory size. +# export HADOOP_HEAPSIZE_MAX= + +# The minimum amount of heap to use (Java -Xms). If no unit +# is provided, it will be converted to MB. Daemons will +# prefer any Xms setting in their respective _OPT variable. +# There is no default; the JVM will autoscale based upon machine +# memory size. +# export HADOOP_HEAPSIZE_MIN= # Extra Java runtime options for all Hadoop commands. We don't support # IPv6 yet/still, so by default the preference is set to IPv4. diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example index 7699c406ba..b2f78f8a41 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example @@ -84,11 +84,4 @@ # echo "ERROR: ${JAVA} is not executable." 1>&2 # exit 1 # fi -# JAVA_HEAP_MAX=-Xmx1g -# HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-128} -# -# # check envvars which might override default args -# if [[ -n "$HADOOP_HEAPSIZE" ]]; then -# JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m" -# fi #} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index ee666f3a25..8140f18b97 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -273,7 +273,6 @@ if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then fi fi -hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}" hadoop_finalize if [[ -n "${supportdaemonization}" ]]; then diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index 667777a4c8..9f284718d1 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -95,7 +95,7 @@ case ${COMMAND} in hadoop_debug "Appending HADOOP_JOB_HISTORYSERVER_OPTS onto HADOOP_OPTS" HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}" if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then - JAVA_HEAP_MAX="-Xmx${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}m" + HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" fi HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER} ;; @@ -147,7 +147,6 @@ if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log" fi -hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}" hadoop_finalize if [[ -n "${supportdaemonization}" ]]; then diff --git a/hadoop-mapreduce-project/conf/mapred-env.sh b/hadoop-mapreduce-project/conf/mapred-env.sh index 8a4b372932..4088cebf34 100644 --- a/hadoop-mapreduce-project/conf/mapred-env.sh +++ b/hadoop-mapreduce-project/conf/mapred-env.sh @@ -52,17 +52,14 @@ # Job History Server specific parameters ### -# Specify the max heapsize for the Job History Server using a numerical value -# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set -# the value to 1000. -# This value will be overridden by an Xmx setting specified in either -# MAPRED_OPTS, HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS. -# If not specified, the default value will be picked from either HADOOP_HEAPSIZE -# or the built-in default. -# -#export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 +# Specify the max heapsize for the JobHistoryServer. If no units are +# given, it will be assumed to be in MB. +# This value will be overridden by an Xmx setting specified in either YARN_OPTS, +# HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS. +# Default is the same as HADOOP_HEAPSIZE_MAX. +#export HADOOP_JOB_HISTORYSERVER_HEAPSIZE= -# Specify the JVM options to be used when starting the ResourceManager. +# Specify the JVM options to be used when starting the HistoryServer. # These options will be appended to the options specified as YARN_OPTS # and therefore may override any similar flags set in YARN_OPTS #export HADOOP_JOB_HISTORYSERVER_OPTS= diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index dfa27e4166..760d8e631c 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -117,8 +117,9 @@ case "${COMMAND}" in CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager' hadoop_debug "Append YARN_NODEMANAGER_OPTS onto YARN_OPTS" YARN_OPTS="${YARN_OPTS} ${YARN_NODEMANAGER_OPTS}" + # Backwards compatibility if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then - JAVA_HEAP_MAX="-Xmx${YARN_NODEMANAGER_HEAPSIZE}m" + HADOOP_HEAPSIZE_MAX="${YARN_NODEMANAGER_HEAPSIZE}" fi ;; proxyserver) @@ -126,8 +127,9 @@ case "${COMMAND}" in CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' hadoop_debug "Append YARN_PROXYSERVER_OPTS onto YARN_OPTS" YARN_OPTS="${YARN_OPTS} ${YARN_PROXYSERVER_OPTS}" + # Backwards compatibility if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then - JAVA_HEAP_MAX="-Xmx${YARN_PROXYSERVER_HEAPSIZE}m" + HADOOP_HEAPSIZE_MAX="${YARN_PROXYSERVER_HEAPSIZE}" fi ;; queue) @@ -140,8 +142,9 @@ case "${COMMAND}" in CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' YARN_OPTS="${YARN_OPTS} ${YARN_RESOURCEMANAGER_OPTS}" hadoop_debug "Append YARN_RESOURCEMANAGER_OPTS onto YARN_OPTS" + # Backwards compatibility if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then - JAVA_HEAP_MAX="-Xmx${YARN_RESOURCEMANAGER_HEAPSIZE}m" + HADOOP_HEAPSIZE_MAX="${YARN_RESOURCEMANAGER_HEAPSIZE}" fi ;; rmadmin) @@ -154,8 +157,9 @@ case "${COMMAND}" in CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' hadoop_debug "Append YARN_TIMELINESERVER_OPTS onto YARN_OPTS" YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}" + # Backwards compatibility if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then - JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m" + HADOOP_HEAPSIZE_MAX="${YARN_TIMELINESERVER_HEAPSIZE}" fi ;; sharedcachemanager) @@ -196,8 +200,6 @@ if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log" fi -hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}" - # Add YARN custom options to comamnd line in case someone actaully # used these. # diff --git a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh index 3d3a036d73..fdf601f378 100644 --- a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh +++ b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh @@ -49,15 +49,12 @@ # Resource Manager specific parameters ### -# Specify the max heapsize for the ResourceManager using a numerical value -# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set -# the value to 1000. +# Specify the max heapsize for the ResourceManager. If no units are +# given, it will be assumed to be in MB. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_RESOURCEMANAGER_OPTS. -# If not specified, the default value will be picked from either HADOOP_HEAPSIZE -# or the built-in default. -# -#export YARN_RESOURCEMANAGER_HEAPSIZE=1000 +# Default is the same as HADOOP_HEAPSIZE_MAX +#export YARN_RESOURCEMANAGER_HEAPSIZE= # Specify the JVM options to be used when starting the ResourceManager. # These options will be appended to the options specified as YARN_OPTS @@ -83,15 +80,12 @@ # Node Manager specific parameters ### -# Specify the max Heapsize for the NodeManager using a numerical value -# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set -# the value to 1000. +# Specify the max heapsize for the NodeManager. If no units are +# given, it will be assumed to be in MB. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_NODEMANAGER_OPTS. -# If not specified, the default value will be picked from either HADOOP_HEAPSIZE -# or the built-in default. -# -#export YARN_NODEMANAGER_HEAPSIZE=1000 +# Default is the same as HADOOP_HEAPSIZE_MAX. +#export YARN_NODEMANAGER_HEAPSIZE= # Specify the JVM options to be used when starting the NodeManager. # These options will be appended to the options specified as YARN_OPTS @@ -105,15 +99,12 @@ # TimeLineServer specifc parameters ### -# Specify the max Heapsize for the timeline server using a numerical value -# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set -# the value to 1000. +# Specify the max heapsize for the timelineserver. If no units are +# given, it will be assumed to be in MB. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_TIMELINESERVER_OPTS. -# If not specified, the default value will be picked from either HADOOP_HEAPSIZE -# or the built-in default. -# -#export YARN_TIMELINESERVER_HEAPSIZE=1000 +# Default is the same as HADOOP_HEAPSIZE_MAX. +#export YARN_TIMELINE_HEAPSIZE= # Specify the JVM options to be used when starting the TimeLineServer. # These options will be appended to the options specified as YARN_OPTS @@ -127,15 +118,12 @@ # Web App Proxy Server specifc parameters ### -# Specify the max Heapsize for the proxy server using a numerical value -# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set -# the value to 1000. +# Specify the max heapsize for the web app proxy server. If no units are +# given, it will be assumed to be in MB. # This value will be overridden by an Xmx setting specified in either YARN_OPTS, # HADOOP_OPTS, and/or YARN_PROXYSERVER_OPTS. -# If not specified, the default value will be picked from either HADOOP_HEAPSIZE -# or the built-in default. -# -#export YARN_PROXYSERVER_HEAPSIZE=1000 +# Default is the same as HADOOP_HEAPSIZE_MAX. +#export YARN_PROXYSERVER_HEAPSIZE= # Specify the JVM options to be used when starting the proxy server. # These options will be appended to the options specified as YARN_OPTS