diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-dist.xml
index 7128c75268..f019535320 100644
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-dist.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-dist.xml
@@ -29,6 +29,7 @@
*-config.cmd
start-*.cmd
stop-*.cmd
+ hadoop-layout.sh.example
0755
@@ -42,6 +43,8 @@
*-config.sh
*-config.cmd
+ *-functions.sh
+ hadoop-layout.sh.example
0755
@@ -57,6 +60,10 @@
hadoop.cmd
hdfs.cmd
hadoop-config.cmd
+ hadoop-functions.sh
+ hadoop-layout.sh.example
+ hdfs-config.cmd
+ hdfs-config.sh
0755
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 8a94615808..fad1c77db7 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -9,6 +9,8 @@ Trunk (Unreleased)
HADOOP-10474 Move o.a.h.record to hadoop-streaming. (wheat9)
+ HADOOP-9902. Shell script rewrite (aw)
+
NEW FEATURES
HADOOP-10433. Key Management Server based on KeyProvider API. (tucu)
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
index b1e2018f61..24c4d18e82 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
@@ -15,130 +15,164 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# This script runs the hadoop core commands.
-
-bin=`which $0`
-bin=`dirname ${bin}`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-function print_usage(){
+function hadoop_usage()
+{
echo "Usage: hadoop [--config confdir] COMMAND"
echo " where COMMAND is one of:"
- echo " fs run a generic filesystem user client"
- echo " version print the version"
- echo " jar run a jar file"
- echo " checknative [-a|-h] check native hadoop and compression libraries availability"
- echo " distcp copy file or directories recursively"
- echo " archive -archiveName NAME -p * create a hadoop archive"
+ echo " archive -archiveName NAME -p * "
+ echo " create a Hadoop archive"
+ echo " checknative [-a|-h] check native Hadoop and compression "
+ echo " libraries availability"
echo " classpath prints the class path needed to get the"
+ echo " Hadoop jar and the required libraries"
echo " credential interact with credential providers"
- echo " Hadoop jar and the required libraries"
echo " daemonlog get/set the log level for each daemon"
+ echo " distch path:owner:group:permisson"
+ echo " distributed metadata changer"
+ echo " distcp "
+ echo " copy file or directories recursively"
+ echo " fs run a generic filesystem user client"
+ echo " jar run a jar file"
+ echo " jnipath prints the java.library.path"
+ echo " key manage keys via the KeyProvider"
+ echo " version print the version"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
+
+# This script runs the hadoop core commands.
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+ exit 1
+fi
+
if [ $# = 0 ]; then
- print_usage
- exit
+ hadoop_exit_with_usage 1
fi
COMMAND=$1
-case $COMMAND in
- # usage flags
- --help|-help|-h)
- print_usage
- exit
- ;;
+shift
- #hdfs commands
- namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
- echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
- echo "Instead use the hdfs command for it." 1>&2
- echo "" 1>&2
- #try to locate hdfs and if present, delegate to it.
- shift
- if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
- exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
- elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
- exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
+case ${COMMAND} in
+ balancer|datanode|dfs|dfsadmin|dfsgroups| \
+ namenode|secondarynamenode|fsck|fetchdt|oiv| \
+ portmap|nfs3)
+ hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
+ COMMAND=${COMMAND/dfsgroups/groups}
+ hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${COMMAND}\" instead."
+ hadoop_error ""
+ #try to locate hdfs and if present, delegate to it.
+ if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
+ # shellcheck disable=SC2086
+ exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
+ --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
+ elif [[ -f "${HADOOP_PREFIX}/bin/hdfs" ]]; then
+ # shellcheck disable=SC2086
+ exec "${HADOOP_PREFIX}/bin/hdfs" \
+ --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else
- echo "HADOOP_HDFS_HOME not found!"
+ hadoop_error "HADOOP_HDFS_HOME not found!"
exit 1
fi
- ;;
-
+ ;;
+
#mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
- echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
- echo "Instead use the mapred command for it." 1>&2
- echo "" 1>&2
+ hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
+ COMMAND=${COMMAND/mrgroups/groups}
+ hadoop_error "WARNING: Attempting to execute replacement \"mapred ${COMMAND}\" instead."
+ hadoop_error ""
#try to locate mapred and if present, delegate to it.
- shift
- if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
- exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
- elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
- exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
+ if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then
+ exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
+ --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
+ elif [[ -f "${HADOOP_PREFIX}/bin/mapred" ]]; then
+ exec "${HADOOP_PREFIX}/bin/mapred" \
+ --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else
- echo "HADOOP_MAPRED_HOME not found!"
+ hadoop_error "HADOOP_MAPRED_HOME not found!"
exit 1
fi
- ;;
-
- #core commands
- *)
- # the core commands
- if [ "$COMMAND" = "fs" ] ; then
- CLASS=org.apache.hadoop.fs.FsShell
- elif [ "$COMMAND" = "version" ] ; then
- CLASS=org.apache.hadoop.util.VersionInfo
- elif [ "$COMMAND" = "jar" ] ; then
- CLASS=org.apache.hadoop.util.RunJar
- elif [ "$COMMAND" = "key" ] ; then
- CLASS=org.apache.hadoop.crypto.key.KeyShell
- elif [ "$COMMAND" = "checknative" ] ; then
- CLASS=org.apache.hadoop.util.NativeLibraryChecker
- elif [ "$COMMAND" = "distcp" ] ; then
- CLASS=org.apache.hadoop.tools.DistCp
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- elif [ "$COMMAND" = "daemonlog" ] ; then
- CLASS=org.apache.hadoop.log.LogLevel
- elif [ "$COMMAND" = "archive" ] ; then
- CLASS=org.apache.hadoop.tools.HadoopArchives
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- elif [ "$COMMAND" = "credential" ] ; then
- CLASS=org.apache.hadoop.security.alias.CredentialShell
- elif [ "$COMMAND" = "classpath" ] ; then
- if [ "$#" -eq 1 ]; then
- # No need to bother starting up a JVM for this simple case.
- echo $CLASSPATH
- exit
- else
- CLASS=org.apache.hadoop.util.Classpath
- fi
- elif [[ "$COMMAND" = -* ]] ; then
- # class and package names cannot begin with a -
- echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
- exit 1
- else
- CLASS=$COMMAND
+ ;;
+ archive)
+ CLASS=org.apache.hadoop.tools.HadoopArchives
+ hadoop_add_classpath "${TOOL_PATH}"
+ ;;
+ checknative)
+ CLASS=org.apache.hadoop.util.NativeLibraryChecker
+ ;;
+ classpath)
+ if [[ "$#" -eq 1 ]]; then
+ CLASS=org.apache.hadoop.util.Classpath
+ else
+ hadoop_finalize
+ echo "${CLASSPATH}"
+ exit 0
fi
- shift
-
- # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-
- #make sure security appender is turned off
- HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
-
- export CLASSPATH=$CLASSPATH
- exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
- ;;
-
+ ;;
+ credential)
+ CLASS=org.apache.hadoop.security.alias.CredentialShell
+ ;;
+ daemonlog)
+ CLASS=org.apache.hadoop.log.LogLevel
+ ;;
+ distch)
+ CLASS=org.apache.hadoop.tools.DistCh
+ hadoop_add_classpath "${TOOL_PATH}"
+ ;;
+ distcp)
+ CLASS=org.apache.hadoop.tools.DistCp
+ hadoop_add_classpath "${TOOL_PATH}"
+ ;;
+ fs)
+ CLASS=org.apache.hadoop.fs.FsShell
+ ;;
+ jar)
+ CLASS=org.apache.hadoop.util.RunJar
+ ;;
+ jnipath)
+ hadoop_finalize
+ echo "${JAVA_LIBRARY_PATH}"
+ exit 0
+ ;;
+ key)
+ CLASS=org.apache.hadoop.crypto.key.KeyShell
+ ;;
+ version)
+ CLASS=org.apache.hadoop.util.VersionInfo
+ ;;
+ -*|hdfs)
+ hadoop_exit_with_usage 1
+ ;;
+ *)
+ CLASS="${COMMAND}"
+ ;;
esac
+
+# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
+HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
+
+hadoop_finalize
+export CLASSPATH
+hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
+
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
index a0fb9d0c99..b2fc4d341d 100644
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
@@ -1,3 +1,5 @@
+#
+#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
@@ -13,280 +15,176 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+####
+# IMPORTANT
+####
+
+## The hadoop-config.sh tends to get executed by non-Hadoop scripts.
+## Those parts expect this script to parse/manipulate $@. In order
+## to maintain backward compatibility, this means a surprising
+## lack of functions for bits that would be much better off in
+## a function.
+##
+## In other words, yes, there is some bad things happen here and
+## unless we break the rest of the ecosystem, we can't change it. :(
+
+
# included in all the hadoop scripts with source command
# should not be executable directly
# also should not be passed any arguments, since we need original $*
-
-# Resolve links ($0 may be a softlink) and convert a relative path
-# to an absolute path. NB: The -P option requires bash built-ins
-# or POSIX:2001 compliant cd and pwd.
-
-# HADOOP_CLASSPATH Extra Java CLASSPATH entries.
-#
-# HADOOP_USER_CLASSPATH_FIRST When defined, the HADOOP_CLASSPATH is
-# added in the beginning of the global
-# classpath. Can be defined, for example,
-# by doing
-# export HADOOP_USER_CLASSPATH_FIRST=true
#
+# after doing more config, caller should also exec finalize
+# function to finish last minute/default configs for
+# settings that might be different between daemons & interactive
-this="${BASH_SOURCE-$0}"
-common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
-script="$(basename -- "$this")"
-this="$common_bin/$script"
-
-[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
-
-HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
-HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
-HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
-HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
-HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
-YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
-YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
-MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
-MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
-
-# the root of the Hadoop installation
-# See HADOOP-6255 for directory structure layout
-HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)
-HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
-export HADOOP_PREFIX
-
-#check to see if the conf dir is given as an optional argument
-if [ $# -gt 1 ]
-then
- if [ "--config" = "$1" ]
- then
- shift
- confdir=$1
- if [ ! -d "$confdir" ]; then
- echo "Error: Cannot find configuration directory: $confdir"
- exit 1
- fi
- shift
- HADOOP_CONF_DIR=$confdir
- fi
+# you must be this high to ride the ride
+if [[ -z "${BASH_VERSINFO}" ]] || [[ "${BASH_VERSINFO}" -lt 3 ]]; then
+ echo "Hadoop requires bash v3 or better. Sorry."
+ exit 1
fi
-
-# Allow alternate conf dir location.
-if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then
- DEFAULT_CONF_DIR="conf"
+
+# In order to get partially bootstrapped, we need to figure out where
+# we are located. Chances are good that our caller has already done
+# this work for us, but just in case...
+
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+ _hadoop_common_this="${BASH_SOURCE-$0}"
+ HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hadoop_common_this}")" >/dev/null && pwd -P)
+fi
+
+# get our functions defined for usage later
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh"
else
- DEFAULT_CONF_DIR="etc/hadoop"
-fi
-
-export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
-
-if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
- . "${HADOOP_CONF_DIR}/hadoop-env.sh"
-fi
-
-# User can specify hostnames or a file where the hostnames are (not both)
-if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
- echo \
- "Error: Please specify one variable HADOOP_SLAVES or " \
- "HADOOP_SLAVE_NAME and not both."
+ echo "ERROR: Unable to exec ${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh." 1>&2
exit 1
fi
-# Process command line options that specify hosts or file with host
-# list
-if [ $# -gt 1 ]
-then
- if [ "--hosts" = "$1" ]
- then
- shift
- export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$1"
- shift
- elif [ "--hostnames" = "$1" ]
- then
- shift
- export HADOOP_SLAVE_NAMES=$1
- shift
- fi
+# allow overrides of the above and pre-defines of the below
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh"
fi
-# User can specify hostnames or a file where the hostnames are (not both)
-# (same check as above but now we know it's command line options that cause
-# the problem)
-if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
- echo \
- "Error: Please specify one of --hosts or --hostnames options and not both."
- exit 1
+#
+# IMPORTANT! We are not executing user provided code yet!
+#
+
+# Let's go! Base definitions so we can move forward
+hadoop_bootstrap_init
+
+# let's find our conf.
+#
+# first, check and process params passed to us
+# we process this in-line so that we can directly modify $@
+# if something downstream is processing that directly,
+# we need to make sure our params have been ripped out
+# note that we do many of them here for various utilities.
+# this provides consistency and forces a more consistent
+# user experience
+
+
+# save these off in case our caller needs them
+# shellcheck disable=SC2034
+HADOOP_USER_PARAMS="$@"
+
+HADOOP_DAEMON_MODE="default"
+
+while [[ -z "${_hadoop_common_done}" ]]; do
+ case $1 in
+ --buildpaths)
+ # shellcheck disable=SC2034
+ HADOOP_ENABLE_BUILD_PATHS=true
+ shift
+ ;;
+ --config)
+ shift
+ confdir=$1
+ shift
+ if [[ -d "${confdir}" ]]; then
+ # shellcheck disable=SC2034
+ YARN_CONF_DIR="${confdir}"
+ # shellcheck disable=SC2034
+ HADOOP_CONF_DIR="${confdir}"
+ elif [[ -z "${confdir}" ]]; then
+ hadoop_error "ERROR: No parameter provided for --config "
+ hadoop_exit_with_usage 1
+ else
+ hadoop_error "ERROR: Cannot find configuration directory \"${confdir}\""
+ hadoop_exit_with_usage 1
+ fi
+ ;;
+ --daemon)
+ shift
+ HADOOP_DAEMON_MODE=$1
+ shift
+ if [[ -z "${HADOOP_DAEMON_MODE}" || \
+ ! "${HADOOP_DAEMON_MODE}" =~ ^st(art|op|atus)$ ]]; then
+ hadoop_error "ERROR: --daemon must be followed by either \"start\", \"stop\", or \"status\"."
+ hadoop_exit_with_usage 1
+ fi
+ ;;
+ --help|-help|-h|help|--h|--\?|-\?|\?)
+ hadoop_exit_with_usage 0
+ ;;
+ --hostnames)
+ shift
+ # shellcheck disable=SC2034
+ HADOOP_SLAVE_NAMES="$1"
+ shift
+ ;;
+ --hosts)
+ shift
+ hadoop_populate_slaves_file "$1"
+ shift
+ ;;
+ *)
+ _hadoop_common_done=true
+ ;;
+ esac
+done
+
+hadoop_find_confdir
+hadoop_exec_hadoopenv
+
+#
+# IMPORTANT! User provided code is now available!
+#
+
+# do all the OS-specific startup bits here
+# this allows us to get a decent JAVA_HOME,
+# call crle for LD_LIBRARY_PATH, etc.
+hadoop_os_tricks
+
+hadoop_java_setup
+
+hadoop_basic_init
+
+# inject any sub-project overrides, defaults, etc.
+if declare -F hadoop_subproject_init >/dev/null ; then
+ hadoop_subproject_init
fi
-# check if net.ipv6.bindv6only is set to 1
-bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)
-if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ]
-then
- echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken"
- echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
- exit 1
-fi
-
-# Newer versions of glibc use an arena memory allocator that causes virtual
-# memory usage to explode. This interacts badly with the many threads that
-# we use in Hadoop. Tune the variable down to prevent vmem explosion.
-export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
-
-# Attempt to set JAVA_HOME if it is not set
-if [[ -z $JAVA_HOME ]]; then
- # On OSX use java_home (or /Library for older versions)
- if [ "Darwin" == "$(uname -s)" ]; then
- if [ -x /usr/libexec/java_home ]; then
- export JAVA_HOME=($(/usr/libexec/java_home))
- else
- export JAVA_HOME=(/Library/Java/Home)
- fi
- fi
-
- # Bail if we did not detect it
- if [[ -z $JAVA_HOME ]]; then
- echo "Error: JAVA_HOME is not set and could not be found." 1>&2
- exit 1
- fi
-fi
-
-JAVA=$JAVA_HOME/bin/java
-
-# check envvars which might override default args
-if [ "$HADOOP_HEAPSIZE" != "" ]; then
- #echo "run with heapsize $HADOOP_HEAPSIZE"
- JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
- #echo $JAVA_HEAP_MAX
-fi
-
-# CLASSPATH initially contains $HADOOP_CONF_DIR
-CLASSPATH="${HADOOP_CONF_DIR}"
-
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
-
-if [ "$HADOOP_COMMON_HOME" = "" ]; then
- if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then
- export HADOOP_COMMON_HOME=$HADOOP_PREFIX
- fi
-fi
-
-# for releases, add core hadoop jar & webapps to CLASSPATH
-if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR
-fi
-
-if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*'
-fi
-
-CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'
-
-# default log directory & file
-if [ "$HADOOP_LOG_DIR" = "" ]; then
- HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
-fi
-if [ "$HADOOP_LOGFILE" = "" ]; then
- HADOOP_LOGFILE='hadoop.log'
-fi
-
-# default policy file for service-level authorization
-if [ "$HADOOP_POLICYFILE" = "" ]; then
- HADOOP_POLICYFILE="hadoop-policy.xml"
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-# setup 'java.library.path' for native-hadoop code if necessary
-
-if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
-
- if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
- if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
- else
- JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
- fi
- fi
-fi
-
-# setup a default TOOL_PATH
-TOOL_PATH="${TOOL_PATH:-$HADOOP_PREFIX/share/hadoop/tools/lib/*}"
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
-if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH
-fi
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
-
-# Disable ipv6 as it can cause issues
-HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
-
-# put hdfs in classpath if present
-if [ "$HADOOP_HDFS_HOME" = "" ]; then
- if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then
- export HADOOP_HDFS_HOME=$HADOOP_PREFIX
- fi
-fi
-
-if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR
-fi
-
-if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*'
-fi
-
-CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*'
-
-# put yarn in classpath if present
-if [ "$HADOOP_YARN_HOME" = "" ]; then
- if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then
- export HADOOP_YARN_HOME=$HADOOP_PREFIX
- fi
-fi
-
-if [ -d "$HADOOP_YARN_HOME/$YARN_DIR/webapps" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR
-fi
-
-if [ -d "$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR'/*'
-fi
-
-CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR'/*'
-
-# put mapred in classpath if present AND different from YARN
-if [ "$HADOOP_MAPRED_HOME" = "" ]; then
- if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then
- export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
- fi
-fi
-
-if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then
- if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR
- fi
-
- if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*'
- fi
-
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'
-fi
-
-# Add the user-specified CLASSPATH via HADOOP_CLASSPATH
-# Add it first or last depending on if user has
-# set env-var HADOOP_USER_CLASSPATH_FIRST
-if [ "$HADOOP_CLASSPATH" != "" ]; then
- # Prefix it if its to be preceded
- if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then
- CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH}
- else
- CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
- fi
+# get the native libs in there pretty quick
+hadoop_add_javalibpath "${HADOOP_PREFIX}/build/native"
+hadoop_add_javalibpath "${HADOOP_PREFIX}/${HADOOP_COMMON_LIB_NATIVE_DIR}"
+
+# get the basic java class path for these subprojects
+# in as quickly as possible since other stuff
+# will definitely depend upon it.
+#
+# at some point, this will get replaced with something pluggable
+# so that these functions can sit in their projects rather than
+# common
+#
+for i in common hdfs yarn mapred
+do
+ hadoop_add_to_classpath_$i
+done
+
+#
+# backwards compatibility. new stuff should
+# call this when they are ready
+#
+if [[ -z "${HADOOP_NEW_CONFIG}" ]]; then
+ hadoop_finalize
fi
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh
index 6a4cd69152..b60915c1cd 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh
@@ -15,200 +15,42 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+function hadoop_usage
+{
+ echo "Usage: hadoop-daemon.sh [--config confdir] (start|stop|status) "
+}
-# Runs a Hadoop command as a daemon.
-#
-# Environment Variables
-#
-# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
-# HADOOP_LOG_DIR Where log files are stored. PWD by default.
-# HADOOP_MASTER host:path where hadoop code should be rsync'd from
-# HADOOP_PID_DIR The pid files are stored. /tmp by default.
-# HADOOP_IDENT_STRING A string representing this instance of hadoop. $USER by default
-# HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
-usage="Usage: hadoop-daemon.sh [--config ] [--hosts hostlistfile] [--script script] (start|stop) "
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
- echo $usage
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-# get arguments
-
-#default value
-hadoopScript="$HADOOP_PREFIX"/bin/hadoop
-if [ "--script" = "$1" ]
- then
- shift
- hadoopScript=$1
- shift
+if [[ $# = 0 ]]; then
+ hadoop_exit_with_usage 1
fi
-startStop=$1
-shift
-command=$1
+
+daemonmode=$1
shift
-hadoop_rotate_log ()
-{
- log=$1;
- num=5;
- if [ -n "$2" ]; then
- num=$2
- fi
- if [ -f "$log" ]; then # rotate logs
- while [ $num -gt 1 ]; do
- prev=`expr $num - 1`
- [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
- num=$prev
- done
- mv "$log" "$log.$num";
- fi
-}
-
-if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
- . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+if [[ -z "${HADOOP_HDFS_HOME}" ]]; then
+ hdfsscript="${HADOOP_PREFIX}/bin/hdfs"
+else
+ hdfsscript="${HADOOP_HDFS_HOME}/bin/hdfs"
fi
-# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
-if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
- export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
- export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
- export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
- starting_secure_dn="true"
-fi
-
-#Determine if we're starting a privileged NFS, if so, redefine the appropriate variables
-if [ "$command" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
- export HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
- export HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
- export HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
- starting_privileged_nfs="true"
-fi
-
-if [ "$HADOOP_IDENT_STRING" = "" ]; then
- export HADOOP_IDENT_STRING="$USER"
-fi
-
-
-# get log directory
-if [ "$HADOOP_LOG_DIR" = "" ]; then
- export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
-fi
-
-if [ ! -w "$HADOOP_LOG_DIR" ] ; then
- mkdir -p "$HADOOP_LOG_DIR"
- chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
-fi
-
-if [ "$HADOOP_PID_DIR" = "" ]; then
- HADOOP_PID_DIR=/tmp
-fi
-
-# some variables
-export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
-export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-"INFO,RFA"}
-export HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-"INFO,RFAS"}
-export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-"INFO,NullAppender"}
-log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
-pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
-HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5}
-
-# Set default scheduling priority
-if [ "$HADOOP_NICENESS" = "" ]; then
- export HADOOP_NICENESS=0
-fi
-
-case $startStop in
-
- (start)
-
- [ -w "$HADOOP_PID_DIR" ] || mkdir -p "$HADOOP_PID_DIR"
-
- if [ -f $pid ]; then
- if kill -0 `cat $pid` > /dev/null 2>&1; then
- echo $command running as process `cat $pid`. Stop it first.
- exit 1
- fi
- fi
-
- if [ "$HADOOP_MASTER" != "" ]; then
- echo rsync from $HADOOP_MASTER
- rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
- fi
-
- hadoop_rotate_log $log
- echo starting $command, logging to $log
- cd "$HADOOP_PREFIX"
- case $command in
- namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc)
- if [ -z "$HADOOP_HDFS_HOME" ]; then
- hdfsScript="$HADOOP_PREFIX"/bin/hdfs
- else
- hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs
- fi
- nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
- ;;
- (*)
- nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
- ;;
- esac
- echo $! > $pid
- sleep 1
- head "$log"
- # capture the ulimit output
- if [ "true" = "$starting_secure_dn" ]; then
- echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
- # capture the ulimit info for the appropriate user
- su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
- elif [ "true" = "$starting_privileged_nfs" ]; then
- echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
- su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
- else
- echo "ulimit -a for user $USER" >> $log
- ulimit -a >> $log 2>&1
- fi
- sleep 3;
- if ! ps -p $! > /dev/null ; then
- exit 1
- fi
- ;;
-
- (stop)
-
- if [ -f $pid ]; then
- TARGET_PID=`cat $pid`
- if kill -0 $TARGET_PID > /dev/null 2>&1; then
- echo stopping $command
- kill $TARGET_PID
- sleep $HADOOP_STOP_TIMEOUT
- if kill -0 $TARGET_PID > /dev/null 2>&1; then
- echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9"
- kill -9 $TARGET_PID
- fi
- else
- echo no $command to stop
- fi
- rm -f $pid
- else
- echo no $command to stop
- fi
- ;;
-
- (*)
- echo $usage
- exit 1
- ;;
-
-esac
-
+exec "$hdfsscript" --config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
index 181d7ac101..db06612691 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
@@ -18,19 +18,34 @@
# Run a Hadoop command on all slave hosts.
-usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+function hadoop_usage
+{
+ echo "Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) "
+}
-# if no args specified, show usage
-if [ $# -le 1 ]; then
- echo $usage
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+if [[ $# = 0 ]]; then
+ hadoop_exit_with_usage 1
+fi
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
+hadoop_connect_to_hosts "${bin}/hadoop-daemon.sh" \
+--config "${HADOOP_CONF_DIR}" "$@"
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
new file mode 100644
index 0000000000..646c11ee3f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
@@ -0,0 +1,1036 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+function hadoop_error
+{
+ # NOTE: This function is not user replaceable.
+
+ echo "$*" 1>&2
+}
+
+function hadoop_bootstrap_init
+{
+ # NOTE: This function is not user replaceable.
+
+ # the root of the Hadoop installation
+ # See HADOOP-6255 for the expected directory structure layout
+
+ # By now, HADOOP_LIBEXEC_DIR should have been defined upstream
+ # We can piggyback off of that to figure out where the default
+ # HADOOP_FREFIX should be. This allows us to run without
+ # HADOOP_PREFIX ever being defined by a human! As a consequence
+ # HADOOP_LIBEXEC_DIR now becomes perhaps the single most powerful
+ # env var within Hadoop.
+ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+ hadoop_error "HADOOP_LIBEXEC_DIR is not defined. Exiting."
+ exit 1
+ fi
+ HADOOP_DEFAULT_PREFIX=$(cd -P -- "${HADOOP_LIBEXEC_DIR}/.." >/dev/null && pwd -P)
+ HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
+ export HADOOP_PREFIX
+
+ #
+ # short-cuts. vendors may redefine these as well, preferably
+ # in hadoop-layouts.sh
+ #
+ HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
+ HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
+ HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
+ HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
+ HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
+ YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
+ YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
+ MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
+ MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
+ # setup a default TOOL_PATH
+ TOOL_PATH=${TOOL_PATH:-${HADOOP_PREFIX}/share/hadoop/tools/lib/*}
+
+ export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
+
+
+ # defaults
+ export HADOOP_OPTS=${HADOOP_OPTS:-"-Djava.net.preferIPv4Stack=true"}
+}
+
+function hadoop_find_confdir
+{
+ # NOTE: This function is not user replaceable.
+
+ # Look for the basic hadoop configuration area.
+ #
+ #
+ # An attempt at compatibility with some Hadoop 1.x
+ # installs.
+ if [[ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]]; then
+ DEFAULT_CONF_DIR="conf"
+ else
+ DEFAULT_CONF_DIR="etc/hadoop"
+ fi
+ export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_PREFIX}/${DEFAULT_CONF_DIR}}"
+}
+
+function hadoop_exec_hadoopenv
+{
+ # NOTE: This function is not user replaceable.
+
+ if [[ -z "${HADOOP_ENV_PROCESSED}" ]]; then
+ if [[ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]]; then
+ export HADOOP_ENV_PROCESSED=true
+ . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+ fi
+ fi
+}
+
+
+function hadoop_basic_init
+{
+ # Some of these are also set in hadoop-env.sh.
+ # we still set them here just in case hadoop-env.sh is
+ # broken in some way, set up defaults, etc.
+ #
+ # but it is important to note that if you update these
+ # you also need to update hadoop-env.sh as well!!!
+
+ # CLASSPATH initially contains $HADOOP_CONF_DIR
+ CLASSPATH="${HADOOP_CONF_DIR}"
+
+ if [[ -z "${HADOOP_COMMON_HOME}" ]] &&
+ [[ -d "${HADOOP_PREFIX}/${HADOOP_COMMON_DIR}" ]]; then
+ export HADOOP_COMMON_HOME="${HADOOP_PREFIX}"
+ fi
+
+ # default policy file for service-level authorization
+ HADOOP_POLICYFILE=${HADOOP_POLICYFILE:-"hadoop-policy.xml"}
+
+ # define HADOOP_HDFS_HOME
+ if [[ -z "${HADOOP_HDFS_HOME}" ]] &&
+ [[ -d "${HADOOP_PREFIX}/${HDFS_DIR}" ]]; then
+ export HADOOP_HDFS_HOME="${HADOOP_PREFIX}"
+ fi
+
+ # define HADOOP_YARN_HOME
+ if [[ -z "${HADOOP_YARN_HOME}" ]] &&
+ [[ -d "${HADOOP_PREFIX}/${YARN_DIR}" ]]; then
+ export HADOOP_YARN_HOME="${HADOOP_PREFIX}"
+ fi
+
+ # define HADOOP_MAPRED_HOME
+ if [[ -z "${HADOOP_MAPRED_HOME}" ]] &&
+ [[ -d "${HADOOP_PREFIX}/${MAPRED_DIR}" ]]; then
+ export HADOOP_MAPRED_HOME="${HADOOP_PREFIX}"
+ fi
+
+ HADOOP_IDENT_STRING=${HADOP_IDENT_STRING:-$USER}
+ HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-"${HADOOP_PREFIX}/logs"}
+ HADOOP_LOGFILE=${HADOOP_LOGFILE:-hadoop.log}
+ HADOOP_NICENESS=${HADOOP_NICENESS:-0}
+ HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5}
+ HADOOP_PID_DIR=${HADOOP_PID_DIR:-/tmp}
+ HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,console}
+ HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_DAEMON_ROOT_LOGGER:-INFO,RFA}
+ HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}
+ HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024}
+ HADOOP_SSH_OPTS=${HADOOP_SSH_OPTS:-"-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"}
+ HADOOP_SECURE_LOG_DIR=${HADOOP_SECURE_LOG_DIR:-${HADOOP_LOG_DIR}}
+ HADOOP_SECURE_PID_DIR=${HADOOP_SECURE_PID_DIR:-${HADOOP_PID_DIR}}
+ HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10}
+}
+
+function hadoop_populate_slaves_file()
+{
+ # NOTE: This function is not user replaceable.
+
+ local slavesfile=$1
+ shift
+ if [[ -f "${slavesfile}" ]]; then
+ # shellcheck disable=2034
+ HADOOP_SLAVES="${slavesfile}"
+ elif [[ -f "${HADOOP_CONF_DIR}/${slavesfile}" ]]; then
+ # shellcheck disable=2034
+ HADOOP_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}"
+ # shellcheck disable=2034
+ YARN_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}"
+ else
+ hadoop_error "ERROR: Cannot find hosts file \"${slavesfile}\""
+ hadoop_exit_with_usage 1
+ fi
+}
+
+function hadoop_rotate_log
+{
+ #
+ # log rotation (mainly used for .out files)
+ # Users are likely to replace this one for something
+ # that gzips or uses dates or who knows what.
+ #
+ # be aware that &1 and &2 might go through here
+ # so don't do anything too crazy...
+ #
+ local log=$1;
+ local num=${2:-5};
+
+ if [[ -f "${log}" ]]; then # rotate logs
+ while [[ ${num} -gt 1 ]]; do
+ #shellcheck disable=SC2086
+ let prev=${num}-1
+ if [[ -f "${log}.${prev}" ]]; then
+ mv "${log}.${prev}" "${log}.${num}"
+ fi
+ num=${prev}
+ done
+ mv "${log}" "${log}.${num}"
+ fi
+}
+
+function hadoop_actual_ssh
+{
+ # we are passing this function to xargs
+ # should get hostname followed by rest of command line
+ local slave=$1
+ shift
+
+ # shellcheck disable=SC2086
+ ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /"
+}
+
+function hadoop_connect_to_hosts
+{
+ # shellcheck disable=SC2124
+ local params="$@"
+
+ #
+ # ssh (or whatever) to a host
+ #
+ # User can specify hostnames or a file where the hostnames are (not both)
+ if [[ -n "${HADOOP_SLAVES}" && -n "${HADOOP_SLAVE_NAMES}" ]] ; then
+ hadoop_error "ERROR: Both HADOOP_SLAVES and HADOOP_SLAVE_NAME were defined. Aborting."
+ exit 1
+ fi
+
+ if [[ -n "${HADOOP_SLAVE_NAMES}" ]] ; then
+ SLAVE_NAMES=${HADOOP_SLAVE_NAMES}
+ else
+ SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
+ fi
+
+ # if pdsh is available, let's use it. otherwise default
+ # to a loop around ssh. (ugh)
+ if [[ -e '/usr/bin/pdsh' ]]; then
+ if [[ -z "${HADOOP_SLAVE_NAMES}" ]] ; then
+ # if we were given a file, just let pdsh deal with it.
+ # shellcheck disable=SC2086
+ PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \
+ -f "${HADOOP_SSH_PARALLEL}" -w ^"${SLAVE_FILE}" $"${@// /\\ }" 2>&1
+ else
+ # no spaces allowed in the pdsh arg host list
+ # shellcheck disable=SC2086
+ SLAVE_NAMES=$(echo ${SLAVE_NAMES} | tr -s ' ' ,)
+ PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \
+ -f "${HADOOP_SSH_PARALLEL}" -w "${SLAVE_NAMES}" $"${@// /\\ }" 2>&1
+ fi
+ else
+ if [[ -z "${SLAVE_NAMES}" ]]; then
+ SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${SLAVE_FILE}")
+ fi
+
+ # quoting here gets tricky. it's easier to push it into a function
+ # so that we don't have to deal with it. However...
+ # xargs can't use a function so instead we'll export it out
+ # and force it into a subshell
+ # moral of the story: just use pdsh.
+ export -f hadoop_actual_ssh
+ export HADOOP_SSH_OPTS
+ echo "${SLAVE_NAMES}" | \
+ xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
+ -I {} bash -c -- "hadoop_actual_ssh {} ${params}"
+ wait
+ fi
+}
+
+function hadoop_add_param
+{
+ #
+ # general param dedupe..
+ # $1 is what we are adding to
+ # $2 is the name of what we want to add (key)
+ # $3 is the key+value of what we're adding
+ #
+ # doing it this way allows us to support all sorts of
+ # different syntaxes, just so long as they are space
+ # delimited
+ #
+ if [[ ! ${!1} =~ $2 ]] ; then
+ # shellcheck disable=SC2086
+ eval $1="'${!1} $3'"
+ fi
+}
+
+function hadoop_add_classpath
+{
+ # two params:
+ # $1 = directory, file, wildcard, whatever to add
+ # $2 = before or after, which determines where in the
+ # classpath this object should go. default is after
+ # return 0 = success
+ # return 1 = failure (duplicate, doesn't exist, whatever)
+
+ # However, with classpath (& JLP), we can do dedupe
+ # along with some sanity checking (e.g., missing directories)
+ # since we have a better idea of what is legal
+ #
+ # for wildcard at end, we can
+ # at least check the dir exists
+ if [[ $1 =~ ^.*\*$ ]]; then
+ local mp=$(dirname "$1")
+ if [[ ! -d "${mp}" ]]; then
+ return 1
+ fi
+
+ # no wildcard in the middle, so check existence
+ # (doesn't matter *what* it is)
+ elif [[ ! $1 =~ ^.*\*.*$ ]] && [[ ! -e "$1" ]]; then
+ return 1
+ fi
+
+ if [[ -z "${CLASSPATH}" ]]; then
+ CLASSPATH=$1
+ elif [[ ":${CLASSPATH}:" != *":$1:"* ]]; then
+ if [[ "$2" = "before" ]]; then
+ CLASSPATH="$1:${CLASSPATH}"
+ else
+ CLASSPATH+=:$1
+ fi
+ fi
+ return 0
+}
+
+function hadoop_add_colonpath
+{
+ # two params:
+ # $1 = directory, file, wildcard, whatever to add
+ # $2 = before or after, which determines where in the
+ # classpath this object should go
+ # return 0 = success
+ # return 1 = failure (duplicate)
+
+ # this is CLASSPATH, JLP, etc but with dedupe but no
+ # other checking
+ if [[ -d "${2}" ]] && [[ ":${!1}:" != *":$2:"* ]]; then
+ if [[ -z "${!1}" ]]; then
+ # shellcheck disable=SC2086
+ eval $1="'$2'"
+ elif [[ "$3" = "before" ]]; then
+ # shellcheck disable=SC2086
+ eval $1="'$2:${!1}'"
+ else
+ # shellcheck disable=SC2086
+ eval $1+="'$2'"
+ fi
+ fi
+}
+
+function hadoop_add_javalibpath
+{
+ # specialized function for a common use case
+ hadoop_add_colonpath JAVA_LIBRARY_PATH "$1" "$2"
+}
+
+function hadoop_add_ldlibpath
+{
+ # specialized function for a common use case
+ hadoop_add_colonpath LD_LIBRARY_PATH "$1" "$2"
+
+ # note that we export this
+ export LD_LIBRARY_PATH
+}
+
+function hadoop_add_to_classpath_common
+{
+
+ #
+ # get all of the common jars+config in the path
+ #
+
+ # developers
+ if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+ hadoop_add_classpath "${HADOOP_COMMON_HOME}/hadoop-common/target/classes"
+ fi
+
+ if [[ -d "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}/webapps" ]]; then
+ hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"
+ fi
+
+ hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_LIB_JARS_DIR}"'/*'
+ hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"'/*'
+}
+
+function hadoop_add_to_classpath_hdfs
+{
+ #
+ # get all of the hdfs jars+config in the path
+ #
+ # developers
+ if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+ hadoop_add_classpath "${HADOOP_HDFS_HOME}/hadoop-hdfs/target/classes"
+ fi
+
+ # put hdfs in classpath if present
+ if [[ -d "${HADOOP_HDFS_HOME}/${HDFS_DIR}/webapps" ]]; then
+ hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDFS_DIR}"
+ fi
+
+ hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDFS_LIB_JARS_DIR}"'/*'
+ hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDFS_DIR}"'/*'
+}
+
+function hadoop_add_to_classpath_yarn
+{
+ #
+ # get all of the yarn jars+config in the path
+ #
+ # developers
+ if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+ for i in yarn-api yarn-common yarn-mapreduce yarn-master-worker \
+ yarn-server/yarn-server-nodemanager \
+ yarn-server/yarn-server-common \
+ yarn-server/yarn-server-resourcemanager; do
+ hadoop_add_classpath "${HADOOP_YARN_HOME}/$i/target/classes"
+ done
+
+ hadoop_add_classpath "${HADOOP_YARN_HOME}/build/test/classes"
+ hadoop_add_classpath "${HADOOP_YARN_HOME}/build/tools"
+ fi
+
+ if [[ -d "${HADOOP_YARN_HOME}/${YARN_DIR}/webapps" ]]; then
+ hadoop_add_classpath "${HADOOP_YARN_HOME}/${YARN_DIR}"
+ fi
+
+ hadoop_add_classpath "${HADOOP_YARN_HOME}/${YARN_LIB_JARS_DIR}"'/*'
+ hadoop_add_classpath "${HADOOP_YARN_HOME}/${YARN_DIR}"'/*'
+}
+
+function hadoop_add_to_classpath_mapred
+{
+ #
+ # get all of the mapreduce jars+config in the path
+ #
+ # developers
+ if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-shuffle/target/classes"
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-common/target/classes"
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-hs/target/classes"
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-hs-plugins/target/classes"
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-app/target/classes"
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-jobclient/target/classes"
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-core/target/classes"
+ fi
+
+ if [[ -d "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}/webapps" ]]; then
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}"
+ fi
+
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_LIB_JARS_DIR}"'/*'
+ hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}"'/*'
+}
+
+
+function hadoop_add_to_classpath_userpath
+{
+ # Add the user-specified HADOOP_CLASSPATH to the
+ # official CLASSPATH env var.
+ # Add it first or last depending on if user has
+ # set env-var HADOOP_USER_CLASSPATH_FIRST
+ # we'll also dedupe it, because we're cool like that.
+ #
+ local c
+ local array
+ local i
+ local j
+ let c=0
+
+ if [[ -n "${HADOOP_CLASSPATH}" ]]; then
+ # I wonder if Java runs on VMS.
+ for i in $(echo "${HADOOP_CLASSPATH}" | tr : '\n'); do
+ array[$c]=$i
+ let c+=1
+ done
+ let j=c-1
+
+ if [[ -z "${HADOOP_USER_CLASSPATH_FIRST}" ]]; then
+ for ((i=j; i>=0; i--)); do
+ hadoop_add_classpath "${array[$i]}" before
+ done
+ else
+ for ((i=0; i<=j; i++)); do
+ hadoop_add_classpath "${array[$i]}" after
+ done
+ fi
+ fi
+}
+
+function hadoop_os_tricks
+{
+ local bindv6only
+
+ # some OSes have special needs. here's some out of the box
+ # examples for OS X and Linux. Vendors, replace this with your special sauce.
+ case ${HADOOP_OS_TYPE} in
+ Darwin)
+ if [[ -x /usr/libexec/java_home ]]; then
+ export JAVA_HOME="$(/usr/libexec/java_home)"
+ else
+ export JAVA_HOME=/Library/Java/Home
+ fi
+ ;;
+ Linux)
+ bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)
+
+ # NOTE! HADOOP_ALLOW_IPV6 is a developer hook. We leave it
+ # undocumented in hadoop-env.sh because we don't want users to
+ # shoot themselves in the foot while devs make IPv6 work.
+ if [[ -n "${bindv6only}" ]] &&
+ [[ "${bindv6only}" -eq "1" ]] &&
+ [[ "${HADOOP_ALLOW_IPV6}" != "yes" ]]; then
+ hadoop_error "ERROR: \"net.ipv6.bindv6only\" is set to 1 "
+ hadoop_error "ERROR: Hadoop networking could be broken. Aborting."
+ hadoop_error "ERROR: For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
+ exit 1
+ fi
+ # Newer versions of glibc use an arena memory allocator that
+ # causes virtual # memory usage to explode. This interacts badly
+ # with the many threads that we use in Hadoop. Tune the variable
+ # down to prevent vmem explosion.
+ export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
+ ;;
+ esac
+}
+
+function hadoop_java_setup
+{
+ # Bail if we did not detect it
+ if [[ -z "${JAVA_HOME}" ]]; then
+ hadoop_error "ERROR: JAVA_HOME is not set and could not be found."
+ exit 1
+ fi
+
+ if [[ ! -d "${JAVA_HOME}" ]]; then
+ hadoop_error "ERROR: JAVA_HOME ${JAVA_HOME} does not exist."
+ exit 1
+ fi
+
+ JAVA="${JAVA_HOME}/bin/java"
+
+ if [[ ! -x "$JAVA" ]]; then
+ hadoop_error "ERROR: $JAVA is not executable."
+ exit 1
+ fi
+ # shellcheck disable=SC2034
+ JAVA_HEAP_MAX=-Xmx1g
+ HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024}
+
+ # check envvars which might override default args
+ if [[ -n "$HADOOP_HEAPSIZE" ]]; then
+ # shellcheck disable=SC2034
+ JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
+ fi
+}
+
+
+function hadoop_finalize_libpaths
+{
+ if [[ -n "${JAVA_LIBRARY_PATH}" ]]; then
+ hadoop_add_param HADOOP_OPTS java.library.path \
+ "-Djava.library.path=${JAVA_LIBRARY_PATH}"
+ export LD_LIBRARY_PATH
+ fi
+}
+
+#
+# fill in any last minute options that might not have been defined yet
+#
+# Note that we are replacing ' ' with '\ ' so that directories with
+# spaces work correctly when run exec blah
+#
+function hadoop_finalize_hadoop_opts
+{
+ hadoop_add_param HADOOP_OPTS hadoop.log.dir "-Dhadoop.log.dir=${HADOOP_LOG_DIR/ /\ }"
+ hadoop_add_param HADOOP_OPTS hadoop.log.file "-Dhadoop.log.file=${HADOOP_LOGFILE/ /\ }"
+ hadoop_add_param HADOOP_OPTS hadoop.home.dir "-Dhadoop.home.dir=${HADOOP_PREFIX/ /\ }"
+ hadoop_add_param HADOOP_OPTS hadoop.id.str "-Dhadoop.id.str=${HADOOP_IDENT_STRING/ /\ }"
+ hadoop_add_param HADOOP_OPTS hadoop.root.logger "-Dhadoop.root.logger=${HADOOP_ROOT_LOGGER}"
+ hadoop_add_param HADOOP_OPTS hadoop.policy.file "-Dhadoop.policy.file=${HADOOP_POLICYFILE/ /\ }"
+ hadoop_add_param HADOOP_OPTS hadoop.security.logger "-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER}"
+}
+
+function hadoop_finalize_classpath
+{
+
+ # we want the HADOOP_CONF_DIR at the end
+ # according to oom, it gives a 2% perf boost
+ hadoop_add_classpath "${HADOOP_CONF_DIR}" after
+
+ # user classpath gets added at the last minute. this allows
+ # override of CONF dirs and more
+ hadoop_add_to_classpath_userpath
+}
+
+function hadoop_finalize
+{
+ # user classpath gets added at the last minute. this allows
+ # override of CONF dirs and more
+ hadoop_finalize_classpath
+ hadoop_finalize_libpaths
+ hadoop_finalize_hadoop_opts
+}
+
+function hadoop_exit_with_usage
+{
+ # NOTE: This function is not user replaceable.
+
+ local exitcode=$1
+ if [[ -z $exitcode ]]; then
+ exitcode=1
+ fi
+ if declare -F hadoop_usage >/dev/null ; then
+ hadoop_usage
+ elif [[ -x /usr/bin/cowsay ]]; then
+ /usr/bin/cowsay -f elephant "Sorry, no help available."
+ else
+ hadoop_error "Sorry, no help available."
+ fi
+ exit $exitcode
+}
+
+function hadoop_verify_secure_prereq
+{
+ # if you are on an OS like Illumos that has functional roles
+ # and you are using pfexec, you'll probably want to change
+ # this.
+
+ # ${EUID} comes from the shell itself!
+ if [[ "${EUID}" -ne 0 ]] || [[ -n "${HADOOP_SECURE_COMMAND}" ]]; then
+ hadoop_error "ERROR: You must be a privileged in order to run a secure serice."
+ return 1
+ else
+ return 0
+ fi
+}
+
+function hadoop_setup_secure_service
+{
+ # need a more complicated setup? replace me!
+
+ HADOOP_PID_DIR=${HADOOP_SECURE_PID_DIR}
+ HADOOP_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
+}
+
+function hadoop_verify_piddir
+{
+ if [[ -z "${HADOOP_PID_DIR}" ]]; then
+ hadoop_error "No pid directory defined."
+ exit 1
+ fi
+ if [[ ! -w "${HADOOP_PID_DIR}" ]] && [[ ! -d "${HADOOP_PID_DIR}" ]]; then
+ hadoop_error "WARNING: ${HADOOP_PID_DIR} does not exist. Creating."
+ mkdir -p "${HADOOP_PID_DIR}" > /dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Unable to create ${HADOOP_PID_DIR}. Aborting."
+ exit 1
+ fi
+ fi
+ touch "${HADOOP_PID_DIR}/$$" >/dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Unable to write in ${HADOOP_PID_DIR}. Aborting."
+ exit 1
+ fi
+ rm "${HADOOP_PID_DIR}/$$" >/dev/null 2>&1
+}
+
+function hadoop_verify_logdir
+{
+ if [[ -z "${HADOOP_LOG_DIR}" ]]; then
+ hadoop_error "No log directory defined."
+ exit 1
+ fi
+ if [[ ! -w "${HADOOP_LOG_DIR}" ]] && [[ ! -d "${HADOOP_LOG_DIR}" ]]; then
+ hadoop_error "WARNING: ${HADOOP_LOG_DIR} does not exist. Creating."
+ mkdir -p "${HADOOP_LOG_DIR}" > /dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Unable to create ${HADOOP_LOG_DIR}. Aborting."
+ exit 1
+ fi
+ fi
+ touch "${HADOOP_LOG_DIR}/$$" >/dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Unable to write in ${HADOOP_LOG_DIR}. Aborting."
+ exit 1
+ fi
+ rm "${HADOOP_LOG_DIR}/$$" >/dev/null 2>&1
+}
+
+function hadoop_status_daemon() {
+ #
+ # LSB 4.1.0 compatible status command (1)
+ #
+ # 0 = program is running
+ # 1 = dead, but still a pid (2)
+ # 2 = (not used by us)
+ # 3 = not running
+ #
+ # 1 - this is not an endorsement of the LSB
+ #
+ # 2 - technically, the specification says /var/run/pid, so
+ # we should never return this value, but we're giving
+ # them the benefit of a doubt and returning 1 even if
+ # our pid is not in in /var/run .
+ #
+
+ local pidfile=$1
+ shift
+
+ local pid
+
+ if [[ -f "${pidfile}" ]]; then
+ pid=$(cat "${pidfile}")
+ if ps -p "${pid}" > /dev/null 2>&1; then
+ return 0
+ fi
+ return 1
+ fi
+ return 3
+}
+
+function hadoop_java_exec
+{
+ # run a java command. this is used for
+ # non-daemons
+
+ local command=$1
+ local class=$2
+ shift 2
+ # we eval this so that paths with spaces work
+ #shellcheck disable=SC2086
+ eval exec "$JAVA" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@"
+
+}
+
+function hadoop_start_daemon
+{
+ # this is our non-privileged daemon starter
+ # that fires up a daemon in the *foreground*
+ # so complex! so wow! much java!
+ local command=$1
+ local class=$2
+ shift 2
+ #shellcheck disable=SC2086
+ eval exec "$JAVA" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@"
+}
+
+function hadoop_start_daemon_wrapper
+{
+ # this is our non-privileged daemon start
+ # that fires up a daemon in the *background*
+ local daemonname=$1
+ local class=$2
+ local pidfile=$3
+ local outfile=$4
+ shift 4
+
+ hadoop_rotate_log "${outfile}"
+
+ hadoop_start_daemon "${daemonname}" \
+ "$class" "$@" >> "${outfile}" 2>&1 < /dev/null &
+ #shellcheck disable=SC2086
+ echo $! > "${pidfile}" 2>/dev/null
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot write pid ${pidfile}."
+ fi
+
+ # shellcheck disable=SC2086
+ renice "${HADOOP_NICENESS}" $! >/dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot set priority of process $!"
+ fi
+
+ # shellcheck disable=SC2086
+ disown $! 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot disconnect process $!"
+ fi
+ sleep 1
+
+ # capture the ulimit output
+ ulimit -a >> "${outfile}" 2>&1
+
+ # shellcheck disable=SC2086
+ if ! ps -p $! >/dev/null 2>&1; then
+ return 1
+ fi
+ return 0
+}
+
+function hadoop_start_secure_daemon
+{
+ # this is used to launch a secure daemon in the *foreground*
+ #
+ local daemonname=$1
+ local class=$2
+
+ # pid file to create for our deamon
+ local daemonpidfile=$3
+
+ # where to send stdout. jsvc has bad habits so this *may* be &1
+ # which means you send it to stdout!
+ local daemonoutfile=$4
+
+ # where to send stderr. same thing, except &2 = stderr
+ local daemonerrfile=$5
+ shift 5
+
+
+
+ hadoop_rotate_log "${daemonoutfile}"
+ hadoop_rotate_log "${daemonerrfile}"
+
+ jsvc="${JSVC_HOME}/jsvc"
+ if [[ ! -f "${jsvc}" ]]; then
+ hadoop_error "JSVC_HOME is not set or set incorrectly. jsvc is required to run secure"
+ hadoop_error "or privileged daemons. Please download and install jsvc from "
+ hadoop_error "http://archive.apache.org/dist/commons/daemon/binaries/ "
+ hadoop_error "and set JSVC_HOME to the directory containing the jsvc binary."
+ exit 1
+ fi
+
+ # note that shellcheck will throw a
+ # bogus for-our-use-case 2086 here.
+ # it doesn't properly support multi-line situations
+
+ exec "${jsvc}" \
+ "-Dproc_${daemonname}" \
+ -outfile "${daemonoutfile}" \
+ -errfile "${daemonerrfile}" \
+ -pidfile "${daemonpidfile}" \
+ -nodetach \
+ -user "${HADOOP_SECURE_USER}" \
+ -cp "${CLASSPATH}" \
+ ${HADOOP_OPTS} \
+ "${class}" "$@"
+}
+
+function hadoop_start_secure_daemon_wrapper
+{
+ # this wraps hadoop_start_secure_daemon to take care
+ # of the dirty work to launch a daemon in the background!
+ local daemonname=$1
+ local class=$2
+
+ # same rules as hadoop_start_secure_daemon except we
+ # have some additional parameters
+
+ local daemonpidfile=$3
+
+ local daemonoutfile=$4
+
+ # the pid file of the subprocess that spawned our
+ # secure launcher
+ local jsvcpidfile=$5
+
+ # the output of the subprocess that spawned our secure
+ # launcher
+ local jsvcoutfile=$6
+
+ local daemonerrfile=$7
+ shift 7
+
+ hadoop_rotate_log "${jsvcoutfile}"
+
+ hadoop_start_secure_daemon \
+ "${daemonname}" \
+ "${class}" \
+ "${daemonpidfile}" \
+ "${daemonoutfile}" \
+ "${daemonerrfile}" "$@" >> "${jsvcoutfile}" 2>&1 < /dev/null &
+
+ # This wrapper should only have one child. Unlike Shawty Lo.
+ #shellcheck disable=SC2086
+ echo $! > "${jsvcpidfile}" 2>/dev/null
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot write pid ${pidfile}."
+ fi
+ sleep 1
+ #shellcheck disable=SC2086
+ renice "${HADOOP_NICENESS}" $! >/dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot set priority of process $!"
+ fi
+ if [[ -f "${daemonpidfile}" ]]; then
+ #shellcheck disable=SC2046
+ renice "${HADOOP_NICENESS}" $(cat "${daemonpidfile}") >/dev/null 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot set priority of process $(cat "${daemonpidfile}")"
+ fi
+ fi
+ #shellcheck disable=SC2086
+ disown $! 2>&1
+ if [[ $? -gt 0 ]]; then
+ hadoop_error "ERROR: Cannot disconnect process $!"
+ fi
+ # capture the ulimit output
+ su "${HADOOP_SECURE_USER}" -c 'bash -c "ulimit -a"' >> "${jsvcoutfile}" 2>&1
+ #shellcheck disable=SC2086
+ if ! ps -p $! >/dev/null 2>&1; then
+ return 1
+ fi
+ return 0
+}
+
+function hadoop_stop_daemon
+{
+ local cmd=$1
+ local pidfile=$2
+ shift 2
+
+ local pid
+
+ if [[ -f "${pidfile}" ]]; then
+ pid=$(cat "$pidfile")
+
+ kill "${pid}" >/dev/null 2>&1
+ sleep "${HADOOP_STOP_TIMEOUT}"
+ if kill -0 "${pid}" > /dev/null 2>&1; then
+ hadoop_error "WARNING: ${cmd} did not stop gracefully after ${HADOOP_STOP_TIMEOUT} seconds: Trying to kill with kill -9"
+ kill -9 "${pid}" >/dev/null 2>&1
+ fi
+ if ps -p "${pid}" > /dev/null 2>&1; then
+ hadoop_error "ERROR: Unable to kill ${pid}"
+ else
+ rm -f "${pidfile}" >/dev/null 2>&1
+ fi
+ fi
+}
+
+
+function hadoop_stop_secure_daemon
+{
+ local command=$1
+ local daemonpidfile=$2
+ local privpidfile=$3
+ shift 3
+ local ret
+
+ hadoop_stop_daemon "${command}" "${daemonpidfile}"
+ ret=$?
+ rm -f "${daemonpidfile}" "${privpidfile}" 2>/dev/null
+ return ${ret}
+}
+
+function hadoop_daemon_handler
+{
+ local daemonmode=$1
+ local daemonname=$2
+ local class=$3
+ local pidfile=$4
+ local outfile=$5
+ shift 5
+
+ case ${daemonmode} in
+ status)
+ hadoop_status_daemon "${daemon_pidfile}"
+ exit $?
+ ;;
+
+ stop)
+ hadoop_stop_daemon "${daemonname}" "${daemon_pidfile}"
+ exit $?
+ ;;
+
+ ##COMPAT -- older hadoops would also start daemons by default
+ start|default)
+ hadoop_verify_piddir
+ hadoop_verify_logdir
+ hadoop_status_daemon "${daemon_pidfile}"
+ if [[ $? == 0 ]]; then
+ hadoop_error "${daemonname} running as process $(cat "${daemon_pidfile}"). Stop it first."
+ exit 1
+ else
+ # stale pid file, so just remove it and continue on
+ rm -f "${daemon_pidfile}" >/dev/null 2>&1
+ fi
+ ##COMPAT - differenticate between --daemon start and nothing
+ # "nothing" shouldn't detach
+ if [[ "$daemonmode" = "default" ]]; then
+ hadoop_start_daemon "${daemonname}" "${class}" "$@"
+ else
+ hadoop_start_daemon_wrapper "${daemonname}" \
+ "${class}" "${daemon_pidfile}" "${daemon_outfile}" "$@"
+ fi
+ ;;
+ esac
+}
+
+
+function hadoop_secure_daemon_handler
+{
+ local daemonmode=$1
+ local daemonname=$2
+ local classname=$3
+ local daemon_pidfile=$4
+ local daemon_outfile=$5
+ local priv_pidfile=$6
+ local priv_outfile=$7
+ local priv_errfile=$8
+ shift 8
+
+ case ${daemonmode} in
+ status)
+ hadoop_status_daemon "${daemon_pidfile}"
+ exit $?
+ ;;
+
+ stop)
+ hadoop_stop_secure_daemon "${daemonname}" \
+ "${daemon_pidfile}" "${priv_pidfile}"
+ exit $?
+ ;;
+
+ ##COMPAT -- older hadoops would also start daemons by default
+ start|default)
+ hadoop_verify_piddir
+ hadoop_verify_logdir
+ hadoop_status_daemon "${daemon_pidfile}"
+ if [[ $? == 0 ]]; then
+ hadoop_error "${daemonname} running as process $(cat "${daemon_pidfile}"). Stop it first."
+ exit 1
+ else
+ # stale pid file, so just remove it and continue on
+ rm -f "${daemon_pidfile}" >/dev/null 2>&1
+ fi
+
+ ##COMPAT - differenticate between --daemon start and nothing
+ # "nothing" shouldn't detach
+ if [[ "${daemonmode}" = "default" ]]; then
+ hadoop_start_secure_daemon "${daemonname}" "${classname}" \
+ "${daemon_pidfile}" "${daemon_outfile}" \
+ "${priv_errfile}" "$@"
+ else
+ hadoop_start_secure_daemon_wrapper "${daemonname}" "${classname}" \
+ "${daemon_pidfile}" "${daemon_outfile}" \
+ "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
+ fi
+ ;;
+ esac
+}
+
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example
new file mode 100644
index 0000000000..a44d3cb9ac
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example
@@ -0,0 +1,93 @@
+# Copyright 2014 The Apache Software Foundation
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##
+## VENDORS!
+##
+## This is where you can redefine the layout of Hadoop directories
+## and expect to be reasonably compatible. Needless to say, this
+## is expert level stuff and one needs to tread carefully.
+##
+## If you move HADOOP_LIBEXEC_DIR from some location that
+## isn't bin/../libexec, you MUST define either HADOOP_LIBEXEC_DIR
+## or have HADOOP_PREFIX/libexec/hadoop-config.sh and
+## HADOOP_PREFIX/libexec/hadoop-layout.sh (this file) exist.
+
+## NOTE:
+##
+## hadoop-functions.sh gets executed BEFORE this file. So you can
+## redefine all of those functions here.
+##
+## *-env.sh get executed AFTER this file but generally too late to
+## override the settings (but not the functions!) here. However, this
+## also means you cannot use things like HADOOP_CONF_DIR for these
+## definitions.
+
+####
+# Common disk layout
+####
+
+# Default location for the common/core Hadoop project
+# export HADOOP_COMMON_HOME=$HADOOP_PREFIX
+
+# Relative locations where components under HADOOP_COMMON_HOME are located
+# export HADOOP_COMMON_DIR="share/hadoop/common"
+# export HADOOP_COMMON_LIB_JARS_DIR="share/hadoop/common/lib"
+# export HADOOP_COMMON_LIB_NATIVE_DIR="lib/native"
+
+####
+# HDFS disk layout
+####
+
+# Default location for the HDFS subproject
+# export HADOOP_HDFS_HOME=$HADOOP_PREFIX
+
+# Relative locations where components under HADOOP_HDFS_HOME are located
+# export HDFS_DIR="share/hadoop/hdfs"
+# export HDFS_LIB_JARS_DIR="share/hadoop/hdfs/lib"
+
+####
+# YARN disk layout
+####
+
+# Default location for the YARN subproject
+# export HADOOP_YARN_HOME=$HADOOP_PREFIX
+
+# Relative locations where components under HADOOP_YARN_HOME are located
+# export YARN_DIR="share/hadoop/yarn"
+# export YARN_LIB_JARS_DIR="share/hadoop/yarn/lib"
+
+# Default location for the MapReduce subproject
+# export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
+
+####
+# MapReduce disk layout
+####
+
+# Relative locations where components under HADOOP_MAPRED_HOME are located
+# export MAPRED_DIR="share/hadoop/mapreduce"
+# export MAPRED_LIB_JARS_DIR="share/hadoop/mapreduce/lib"
+
+####
+# Misc paths
+####
+
+# setup a default TOOL_PATH, where things like distcp lives
+# note that this path only gets added for certain commands and not
+# part of the general classpath
+# export TOOL_PATH="$HADOOP_PREFIX/share/hadoop/tools/lib/*"
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/rcc b/hadoop-common-project/hadoop-common/src/main/bin/rcc
index 22bffffbf2..dc6158a8ea 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/rcc
+++ b/hadoop-common-project/hadoop-common/src/main/bin/rcc
@@ -15,47 +15,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
-# The Hadoop record compiler
-#
-# Environment Variables
-#
-# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
-#
-# HADOOP_OPTS Extra Java runtime options.
-#
-# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
-#
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+# This script runs the hadoop core commands.
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "$this")" >/dev/null && pwd -P)
+script="$(basename -- "$this")"
+this="$bin/$script"
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+HADOOP_NEW_CONFIG=true
+. "$HADOOP_LIBEXEC_DIR/hadoop-config.sh"
-if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
- . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+if [ $# = 0 ]; then
+ hadoop_exit_with_usage 1
fi
-# some Java parameters
-if [ "$JAVA_HOME" != "" ]; then
- #echo "run java in $JAVA_HOME"
- JAVA_HOME=$JAVA_HOME
-fi
-
-if [ "$JAVA_HOME" = "" ]; then
- echo "Error: JAVA_HOME is not set."
- exit 1
-fi
-
-JAVA=$JAVA_HOME/bin/java
-JAVA_HEAP_MAX=-Xmx1000m
-
-# restore ordinary behaviour
-unset IFS
-
CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
-# run it
-exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
+# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
+HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+
+hadoop_add_param HADOOP_OPTS Xmx "$JAVA_HEAP_MAX"
+
+hadoop_finalize
+export CLASSPATH
+hadoop_java_exec rcc "${CLASS}" "$@"
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh b/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
index fe164a8924..0ffb4849cf 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
@@ -27,38 +27,33 @@
# HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
##
-usage="Usage: slaves.sh [--config confdir] command..."
+function hadoop_usage {
+ echo "Usage: slaves.sh [--config confdir] command..."
+}
-# if no args specified, show usage
-if [ $# -le 0 ]; then
- echo $usage
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-
-# Where to start the script, see hadoop-config.sh
-# (it set up the variables based on command line options)
-if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
- SLAVE_NAMES=$HADOOP_SLAVE_NAMES
-else
- SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
- SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed 's/#.*$//;/^$/d')
+# if no args specified, show usage
+if [[ $# -le 0 ]]; then
+ hadoop_exit_with_usage 1
fi
-# start the daemons
-for slave in $SLAVE_NAMES ; do
- ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
- 2>&1 | sed "s/^/$slave: /" &
- if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
- sleep $HADOOP_SLAVE_SLEEP
- fi
-done
+hadoop_connect_to_hosts "$@"
-wait
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/start-all.sh b/hadoop-common-project/hadoop-common/src/main/bin/start-all.sh
index 312432801f..edd1b934dd 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/start-all.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/start-all.sh
@@ -15,24 +15,38 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+echo "This script is deprecated. Use start-dfs.sh and start-yarn.sh instead."
+exit 1
-# Start all hadoop daemons. Run this on master node.
-echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+ exit 1
+fi
# start hdfs daemons if hdfs is present
-if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then
- "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" ]]; then
+ "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" --config "${HADOOP_CONF_DIR}"
fi
# start yarn daemons if yarn is present
-if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then
- "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" ]]; then
+ "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" --config "${HADOOP_CONF_DIR}"
fi
+
+
+
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/stop-all.sh b/hadoop-common-project/hadoop-common/src/main/bin/stop-all.sh
index 9a2fe98fc0..a0311e482e 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/stop-all.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/stop-all.sh
@@ -18,21 +18,35 @@
# Stop all hadoop daemons. Run this on master node.
-echo "This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh"
+echo "This script is deprecated. Use stop-dfs.sh and stop-yarn.sh instead."
+exit 1
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+ exit 1
+fi
# stop hdfs daemons if hdfs is present
-if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh ]; then
- "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" ]]; then
+ "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" --config "${HADOOP_CONF_DIR}"
fi
# stop yarn daemons if yarn is present
-if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh ]; then
- "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" ]]; then
+ "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" --config "${HADOOP_CONF_DIR}"
fi
+
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
index f264389408..f50e412663 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
@@ -1,3 +1,4 @@
+#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,71 +17,393 @@
# Set Hadoop-specific environment variables here.
-# The only required environment variable is JAVA_HOME. All others are
-# optional. When running a distributed configuration it is best to
-# set JAVA_HOME in this file, so that it is correctly defined on
-# remote nodes.
+##
+## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
+## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE,
+## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
+## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
+##
+## Precedence rules:
+##
+## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
+##
+## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
+##
+
+# Many of the options here are built from the perspective that users
+# may want to provide OVERWRITING values on the command line.
+# For example:
+#
+# JAVA_HOME=/usr/java/testing hdfs dfs -ls
+#
+# Therefore, the vast majority (BUT NOT ALL!) of these defaults
+# are configured for substitution and not append. If you would
+# like append, you'll # need to modify this file accordingly.
+
+###
+# Generic settings for HADOOP
+###
+
+# Technically, the only required environment variable is JAVA_HOME.
+# All others are optional. However, our defaults are probably not
+# your defaults. Many sites configure these options outside of Hadoop,
+# such as in /etc/profile.d
# The java implementation to use.
-export JAVA_HOME=${JAVA_HOME}
+export JAVA_HOME=${JAVA_HOME:-"hadoop-env.sh is not configured"}
-# The jsvc implementation to use. Jsvc is required to run secure datanodes.
-#export JSVC_HOME=${JSVC_HOME}
+# Location of Hadoop's configuration information. i.e., where this
+# file is probably living. You will almost certainly want to set
+# this in /etc/profile.d or equivalent.
+# export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
-export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
+# The maximum amount of heap to use, in MB. Default is 1024.
+# export HADOOP_HEAPSIZE=1024
-# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
-for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
- if [ "$HADOOP_CLASSPATH" ]; then
- export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
- else
- export HADOOP_CLASSPATH=$f
- fi
-done
+# Extra Java runtime options for all Hadoop commands. We don't support
+# IPv6 yet/still, so by default we set preference to IPv4.
+# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
-# The maximum amount of heap to use, in MB. Default is 1000.
-#export HADOOP_HEAPSIZE=
-#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
+# Some parts of the shell code may do special things dependent upon
+# the operating system. We have to set this here. See the next
+# section as to why....
+export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
-# Extra Java runtime options. Empty by default.
-export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
-MAC_OSX=false
-case "`uname`" in
-Darwin*) MAC_OSX=true;;
+# Under certain conditions, Java on OS X will throw SCDynamicStore errors
+# in the system logs.
+# See HADOOP-8719 for more information. If you need Kerberos
+# support on OS X, you'll want to change/remove this extra bit.
+case ${HADOOP_OS_TYPE} in
+ Darwin*)
+ export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
+ export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
+ export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
+ ;;
esac
-if $MAC_OSX; then
- export HADOOP_OPTS="$HADOOP_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
-fi
-# Command specific options appended to HADOOP_OPTS when specified
-export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
-export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
+# Extra Java runtime options for Hadoop clients (i.e., hdfs dfs -blah)
+# These get added to HADOOP_OPTS for such commands. In most cases,
+# this should be left empty and let users supply it on the
+# command line.
+# extra HADOOP_CLIENT_OPTS=""
-export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
+#
+# A note about classpaths.
+#
+# The classpath is configured such that entries are stripped prior
+# to handing to Java based either upon duplication or non-existence.
+# Wildcards and/or directories are *NOT* expanded as the
+# de-duplication is fairly simple. So if two directories are in
+# the classpath that both contain awesome-methods-1.0.jar,
+# awesome-methods-1.0.jar will still be seen by java. But if
+# the classpath specifically has awesome-methods-1.0.jar from the
+# same directory listed twice, the last one will be removed.
+#
-export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
-export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
+# An additional, custom CLASSPATH. This is really meant for
+# end users, but as an administrator, one might want to push
+# something extra in here too, such as the jar to the topology
+# method. Just be sure to append to the existing HADOOP_USER_CLASSPATH
+# so end users have a way to add stuff.
+# export HADOOP_USER_CLASSPATH="/some/cool/path/on/your/machine"
-# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
-export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
-#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
+# Should HADOOP_USER_CLASSPATH be first in the official CLASSPATH?
+# export HADOOP_USER_CLASSPATH_FIRST="yes"
-# On secure datanodes, user to run the datanode as after dropping privileges
-export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
+###
+# Options for remote shell connectivity
+###
-# Where log files are stored. $HADOOP_HOME/logs by default.
-#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
+# There are some optional components of hadoop that allow for
+# command and control of remote hosts. For example,
+# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
-# Where log files are stored in the secure data environment.
-export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
+# Options to pass to SSH when one of the "log into a host and
+# start/stop daemons" scripts is executed
+# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
-# The directory where pid files are stored. /tmp by default.
-# NOTE: this should be set to a directory that can only be written to by
-# the user that will run the hadoop daemons. Otherwise there is the
-# potential for a symlink attack.
-export HADOOP_PID_DIR=${HADOOP_PID_DIR}
-export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
+# The built-in ssh handler will limit itself to 10 simultaneous connections.
+# For pdsh users, this sets the fanout size ( -f )
+# Change this to increase/decrease as necessary.
+# export HADOOP_SSH_PARALLEL=10
+
+# Filename which contains all of the hosts for any remote execution
+# helper scripts # such as slaves.sh, start-dfs.sh, etc.
+# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves"
+
+###
+# Options for all daemons
+###
+#
+
+#
+# You can define variables right here and then re-use them later on.
+# For example, it is common to use the same garbage collection settings
+# for all the daemons. So we could define:
+#
+# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
+#
+# .. and then use it as per the b option under the namenode.
+
+# Where (primarily) daemon log files are stored.
+# $HADOOP_PREFIX/logs by default.
+# export HADOOP_LOG_DIR=${HADOOP_PREFIX}/logs
# A string representing this instance of hadoop. $USER by default.
-export HADOOP_IDENT_STRING=$USER
+# This is used in writing log and pid files, so keep that in mind!
+# export HADOOP_IDENT_STRING=$USER
+
+# How many seconds to pause after stopping a daemon
+# export HADOOP_STOP_TIMEOUT=5
+
+# Where pid files are stored. /tmp by default.
+# export HADOOP_PID_DIR=/tmp
+
+# Default log level and output location
+# This sets the hadoop.root.logger property
+# export HADOOP_ROOT_LOGGER=INFO,console
+
+# Default log level for daemons spawned explicitly by hadoop-daemon.sh
+# This sets the hadoop.root.logger property
+# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
+
+# Default log level and output location for security-related messages.
+# It sets -Dhadoop.security.logger on the command line.
+# You will almost certainly want to change this on a per-daemon basis!
+# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
+
+# Default log level for file system audit messages.
+# It sets -Dhdfs.audit.logger on the command line.
+# You will almost certainly want to change this on a per-daemon basis!
+# export HADOOP_AUDIT_LOGGER=INFO,NullAppender
+
+# Default process priority level
+# Note that sub-processes will also run at this level!
+# export HADOOP_NICENESS=0
+
+# Default name for the service level authorization file
+# export HADOOP_POLICYFILE="hadoop-policy.xml"
+
+###
+# Secure/privileged execution
+###
+
+#
+# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
+# on privileged ports. This functionality can be replaced by providing
+# custom functions. See hadoop-functions.sh for more information.
+#
+
+# The jsvc implementation to use. Jsvc is required to run secure datanodes.
+# export JSVC_HOME=/usr/bin
+
+#
+# This directory contains pids for secure and privileged processes.
+#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
+
+#
+# This directory contains the logs for secure and privileged processes.
+# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
+
+#
+# When running a secure daemon, the default value of HADOOP_IDENT_STRING
+# ends up being a bit bogus. Therefore, by default, the code will
+# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER. If you want
+# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
+# export HADOOP_SECURE_IDENT_PRESERVE="true"
+
+###
+# NameNode specific parameters
+###
+# Specify the JVM options to be used when starting the NameNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# a) Set JMX options
+# export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
+#
+# b) Set garbage collection logs
+# export HADOOP_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+# c) ... or set them directly
+# export HADOOP_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+
+# this is the default:
+# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
+
+###
+# SecondaryNameNode specific parameters
+###
+# Specify the JVM options to be used when starting the SecondaryNameNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# This is the default:
+# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
+
+###
+# DataNode specific parameters
+###
+# Specify the JVM options to be used when starting the DataNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# This is the default:
+# export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
+
+# On secure datanodes, user to run the datanode as after dropping privileges
+# This **MUST** be uncommented to enable secure HDFS!
+# export HADOOP_SECURE_DN_USER=hdfs
+
+# Supplemental options for secure datanodes
+# By default, we use jsvc which needs to know to launch a
+# server jvm.
+# export HADOOP_DN_SECURE_EXTRA_OPTS="-jvm server"
+
+# Where datanode log files are stored in the secure data environment.
+# export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
+
+# Where datanode pid files are stored in the secure data environment.
+# export HADOOP_SECURE_DN_PID_DIR=${HADOOP_SECURE_PID_DIR}
+
+###
+# NFS3 Gateway specific parameters
+###
+# Specify the JVM options to be used when starting the NFS3 Gateway.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_NFS3_OPTS=""
+
+# Specify the JVM options to be used when starting the Hadoop portmapper.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_PORTMAP_OPTS="-Xmx512m"
+
+# Supplemental options for priviliged gateways
+# By default, we use jsvc which needs to know to launch a
+# server jvm.
+# export HADOOP_NFS3_SECURE_EXTRA_OPTS="-jvm server"
+
+# On privileged gateways, user to run the gateway as after dropping privileges
+# export HADOOP_PRIVILEGED_NFS_USER=nfsserver
+
+###
+# ZKFailoverController specific parameters
+###
+# Specify the JVM options to be used when starting the ZKFailoverController.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_ZKFC_OPTS=""
+
+###
+# QuorumJournalNode specific parameters
+###
+# Specify the JVM options to be used when starting the QuorumJournalNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_JOURNALNODE_OPTS=""
+
+###
+# HDFS Balancer specific parameters
+###
+# Specify the JVM options to be used when starting the HDFS Balancer.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_BALANCER_OPTS=""
+
+###
+# Advanced Users Only!
+###
+
+#
+# When building Hadoop, you can add the class paths to your commands
+# via this special env var:
+# HADOOP_ENABLE_BUILD_PATHS="true"
+
+# You can do things like replace parts of the shell underbelly.
+# Most of this code is in hadoop-functions.sh.
+#
+#
+# For example, if you want to add compression to the rotation
+# menthod for the .out files that daemons generate, you can do
+# that by redefining the hadoop_rotate_log function by
+# uncommenting this code block:
+
+#function hadoop_rotate_log
+#{
+# #
+# # log rotation (mainly used for .out files)
+# # Users are likely to replace this one for something
+# # that gzips or uses dates or who knows what.
+# #
+# # be aware that &1 and &2 might go through here
+# # so don't do anything too crazy...
+# #
+# local log=$1;
+# local num=${2:-5};
+#
+# if [[ -f "${log}" ]]; then # rotate logs
+# while [[ ${num} -gt 1 ]]; do
+# #shellcheck disable=SC2086
+# let prev=${num}-1
+# if [[ -f "${log}.${prev}" ]]; then
+# mv "${log}.${prev}" "${log}.${num}"
+# fi
+# num=${prev}
+# done
+# mv "${log}" "${log}.${num}"
+# gzip -9 "${log}.${num}"
+# fi
+#}
+#
+#
+# Another example: finding java
+#
+# By default, Hadoop assumes that $JAVA_HOME is always defined
+# outside of its configuration. Eons ago, Apple standardized
+# on a helper program called java_home to find it for you.
+#
+#function hadoop_java_setup
+#{
+#
+# if [[ -z "${JAVA_HOME}" ]]; then
+# case $HADOOP_OS_TYPE in
+# Darwin*)
+# JAVA_HOME=$(/usr/libexec/java_home)
+# ;;
+# esac
+# fi
+#
+# # Bail if we did not detect it
+# if [[ -z "${JAVA_HOME}" ]]; then
+# echo "ERROR: JAVA_HOME is not set and could not be found." 1>&2
+# exit 1
+# fi
+#
+# if [[ ! -d "${JAVA_HOME}" ]]; then
+# echo "ERROR: JAVA_HOME (${JAVA_HOME}) does not exist." 1>&2
+# exit 1
+# fi
+#
+# JAVA="${JAVA_HOME}/bin/java"
+#
+# if [[ ! -x ${JAVA} ]]; then
+# echo "ERROR: ${JAVA} is not executable." 1>&2
+# exit 1
+# fi
+# JAVA_HEAP_MAX=-Xmx1g
+# HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-128}
+#
+# # check envvars which might override default args
+# if [[ -n "$HADOOP_HEAPSIZE" ]]; then
+# JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
+# fi
+#}
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/distribute-exclude.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/distribute-exclude.sh
index 66fc14a246..3c0b9af3ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/distribute-exclude.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/distribute-exclude.sh
@@ -57,9 +57,9 @@ excludeFilenameRemote=$("$HADOOP_PREFIX/bin/hdfs" getconf -excludeFile)
if [ "$excludeFilenameRemote" = '' ] ; then
echo \
- "Error: hdfs getconf -excludeFile returned empty string, " \
- "please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
- "configuration and on all namenodes"
+ "Error: hdfs getconf -excludeFile returned empty string, " \
+ "please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
+ "configuration and on all namenodes"
exit 1
fi
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
index fa2d863a75..bb5636217a 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
@@ -15,250 +15,237 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Environment Variables
-#
-# JSVC_HOME home directory of jsvc binary. Required for starting secure
-# datanode.
-#
-# JSVC_OUTFILE path to jsvc output file. Defaults to
-# $HADOOP_LOG_DIR/jsvc.out.
-#
-# JSVC_ERRFILE path to jsvc error file. Defaults to $HADOOP_LOG_DIR/jsvc.err.
-
-bin=`which $0`
-bin=`dirname ${bin}`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
-
-function print_usage(){
- echo "Usage: hdfs [--config confdir] COMMAND"
+function hadoop_usage
+{
+ echo "Usage: hdfs [--config confdir] [--daemon (start|stop|status)] COMMAND"
echo " where COMMAND is one of:"
- echo " dfs run a filesystem command on the file systems supported in Hadoop."
- echo " namenode -format format the DFS filesystem"
- echo " secondarynamenode run the DFS secondary namenode"
- echo " namenode run the DFS namenode"
- echo " journalnode run the DFS journalnode"
- echo " zkfc run the ZK Failover Controller daemon"
- echo " datanode run a DFS datanode"
- echo " dfsadmin run a DFS admin client"
- echo " haadmin run a DFS HA admin client"
- echo " fsck run a DFS filesystem checking utility"
echo " balancer run a cluster balancing utility"
- echo " jmxget get JMX exported values from NameNode or DataNode."
- echo " oiv apply the offline fsimage viewer to an fsimage"
- echo " oiv_legacy apply the offline fsimage viewer to an legacy fsimage"
- echo " oev apply the offline edits viewer to an edits file"
+ echo " cacheadmin configure the HDFS cache"
+ echo " classpath prints the class path needed to get the"
+ echo " Hadoop jar and the required libraries"
+ echo " datanode run a DFS datanode"
+ echo " dfs run a filesystem command on the file system"
+ echo " dfsadmin run a DFS admin client"
echo " fetchdt fetch a delegation token from the NameNode"
+ echo " fsck run a DFS filesystem checking utility"
echo " getconf get config values from configuration"
echo " groups get the groups which users belong to"
+ echo " haadmin run a DFS HA admin client"
+ echo " jmxget get JMX exported values from NameNode or DataNode."
+ echo " journalnode run the DFS journalnode"
+ echo " lsSnapshottableDir list all snapshottable dirs owned by the current user"
+ echo " Use -help to see options"
+ echo " namenode run the DFS namenode"
+ echo " Use -format to initialize the DFS filesystem"
+ echo " nfs3 run an NFS version 3 gateway"
+ echo " oev apply the offline edits viewer to an edits file"
+ echo " oiv apply the offline fsimage viewer to an fsimage"
+ echo " oiv_legacy apply the offline fsimage viewer to a legacy fsimage"
+ echo " portmap run a portmap service"
+ echo " secondarynamenode run the DFS secondary namenode"
echo " snapshotDiff diff two snapshots of a directory or diff the"
echo " current directory contents with a snapshot"
- echo " lsSnapshottableDir list all snapshottable dirs owned by the current user"
- echo " Use -help to see options"
- echo " portmap run a portmap service"
- echo " nfs3 run an NFS version 3 gateway"
- echo " cacheadmin configure the HDFS cache"
+ echo " zkfc run the ZK Failover Controller daemon"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
-if [ $# = 0 ]; then
- print_usage
- exit
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
+
+if [[ $# = 0 ]]; then
+ hadoop_exit_with_usage 1
fi
COMMAND=$1
shift
-case $COMMAND in
- # usage flags
- --help|-help|-h)
- print_usage
+case ${COMMAND} in
+ balancer)
+ CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}"
+ ;;
+ cacheadmin)
+ CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
+ ;;
+ classpath)
+ hadoop_finalize
+ echo "${CLASSPATH}"
exit
- ;;
+ ;;
+ datanode)
+ daemon="true"
+ # Determine if we're starting a secure datanode, and
+ # if so, redefine appropriate variables
+ if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
+ secure_service="true"
+ secure_user="${HADOOP_SECURE_DN_USER}"
+
+ # backward compatiblity
+ HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
+ HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
+
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS} ${HADOOP_DATANODE_OPTS}"
+ CLASS="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
+ else
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}"
+ CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
+ fi
+ ;;
+ dfs)
+ CLASS=org.apache.hadoop.fs.FsShell
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ dfsadmin)
+ CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ fetchdt)
+ CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
+ ;;
+ fsck)
+ CLASS=org.apache.hadoop.hdfs.tools.DFSck
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ getconf)
+ CLASS=org.apache.hadoop.hdfs.tools.GetConf
+ ;;
+ groups)
+ CLASS=org.apache.hadoop.hdfs.tools.GetGroups
+ ;;
+ haadmin)
+ CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
+ CLASSPATH="${CLASSPATH}:${TOOL_PATH}"
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ journalnode)
+ daemon="true"
+ CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}"
+ ;;
+ jmxget)
+ CLASS=org.apache.hadoop.hdfs.tools.JMXGet
+ ;;
+ lsSnapshottableDir)
+ CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
+ ;;
+ namenode)
+ daemon="true"
+ CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}"
+ ;;
+ nfs3)
+ daemon="true"
+ if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
+ secure_service="true"
+ secure_user="${HADOOP_PRIVILEGED_NFS_USER}"
+
+ # backward compatiblity
+ HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
+ HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
+
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS} ${HADOOP_NFS3_OPTS}"
+ CLASS=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
+ else
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}"
+ CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
+ fi
+ ;;
+ oev)
+ CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
+ ;;
+ oiv)
+ CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
+ ;;
+ oiv_legacy)
+ CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
+ ;;
+ portmap)
+ daemon="true"
+ CLASS=org.apache.hadoop.portmap.Portmap
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}"
+ ;;
+ secondarynamenode)
+ daemon="true"
+ CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}"
+ ;;
+ snapshotDiff)
+ CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
+ ;;
+ zkfc)
+ daemon="true"
+ CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}"
+ ;;
+ -*)
+ hadoop_exit_with_usage 1
+ ;;
+ *)
+ CLASS="${COMMAND}"
+ ;;
esac
-# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
-if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
- if [ -n "$JSVC_HOME" ]; then
- if [ -n "$HADOOP_SECURE_DN_PID_DIR" ]; then
- HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
- fi
-
- if [ -n "$HADOOP_SECURE_DN_LOG_DIR" ]; then
- HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
- HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
- fi
-
- HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
- HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
- starting_secure_dn="true"
- else
- echo "It looks like you're trying to start a secure DN, but \$JSVC_HOME"\
- "isn't set. Falling back to starting insecure DN."
+if [[ -n "${secure_service}" ]]; then
+ HADOOP_SECURE_USER="${secure_user}"
+ if hadoop_verify_secure_prereq; then
+ hadoop_setup_secure_service
+ priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.out"
+ priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.err"
+ priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.pid"
+ daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+ daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
fi
-fi
-
-# Determine if we're starting a privileged NFS daemon, and if so, redefine appropriate variables
-if [ "$COMMAND" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
- if [ -n "$JSVC_HOME" ]; then
- if [ -n "$HADOOP_PRIVILEGED_NFS_PID_DIR" ]; then
- HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
- fi
-
- if [ -n "$HADOOP_PRIVILEGED_NFS_LOG_DIR" ]; then
- HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
- HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
- fi
-
- HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
- HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
- starting_privileged_nfs="true"
- else
- echo "It looks like you're trying to start a privileged NFS server, but"\
- "\$JSVC_HOME isn't set. Falling back to starting unprivileged NFS server."
- fi
-fi
-
-if [ "$COMMAND" = "namenode" ] ; then
- CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
-elif [ "$COMMAND" = "zkfc" ] ; then
- CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"
-elif [ "$COMMAND" = "secondarynamenode" ] ; then
- CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
-elif [ "$COMMAND" = "datanode" ] ; then
- CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
- if [ "$starting_secure_dn" = "true" ]; then
- HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
- else
- HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
- fi
-elif [ "$COMMAND" = "journalnode" ] ; then
- CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"
-elif [ "$COMMAND" = "dfs" ] ; then
- CLASS=org.apache.hadoop.fs.FsShell
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "dfsadmin" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "haadmin" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "fsck" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.DFSck
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "balancer" ] ; then
- CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
-elif [ "$COMMAND" = "jmxget" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.JMXGet
-elif [ "$COMMAND" = "oiv" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
-elif [ "$COMMAND" = "oiv_legacy" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
-elif [ "$COMMAND" = "oev" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
-elif [ "$COMMAND" = "fetchdt" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
-elif [ "$COMMAND" = "getconf" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.GetConf
-elif [ "$COMMAND" = "groups" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.GetGroups
-elif [ "$COMMAND" = "snapshotDiff" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
-elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
-elif [ "$COMMAND" = "portmap" ] ; then
- CLASS=org.apache.hadoop.portmap.Portmap
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_PORTMAP_OPTS"
-elif [ "$COMMAND" = "nfs3" ] ; then
- CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NFS3_OPTS"
-elif [ "$COMMAND" = "cacheadmin" ] ; then
- CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
else
- CLASS="$COMMAND"
+ daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+ daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
fi
-export CLASSPATH=$CLASSPATH
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
-
-# Check to see if we should start a secure datanode
-if [ "$starting_secure_dn" = "true" ]; then
- if [ "$HADOOP_PID_DIR" = "" ]; then
- HADOOP_SECURE_DN_PID="/tmp/hadoop_secure_dn.pid"
+if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
+ # shellcheck disable=SC2034
+ HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+ if [[ -n "${secure_service}" ]]; then
+ # shellcheck disable=SC2034
+ HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
else
- HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid"
+ # shellcheck disable=SC2034
+ HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi
+fi
- JSVC=$JSVC_HOME/jsvc
- if [ ! -f $JSVC ]; then
- echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run secure datanodes. "
- echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
- "and set JSVC_HOME to the directory containing the jsvc binary."
- exit
- fi
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
+hadoop_finalize
- if [[ ! $JSVC_OUTFILE ]]; then
- JSVC_OUTFILE="$HADOOP_LOG_DIR/jsvc.out"
- fi
+export CLASSPATH
- if [[ ! $JSVC_ERRFILE ]]; then
- JSVC_ERRFILE="$HADOOP_LOG_DIR/jsvc.err"
- fi
-
- exec "$JSVC" \
- -Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
- -errfile "$JSVC_ERRFILE" \
- -pidfile "$HADOOP_SECURE_DN_PID" \
- -nodetach \
- -user "$HADOOP_SECURE_DN_USER" \
- -cp "$CLASSPATH" \
- $JAVA_HEAP_MAX $HADOOP_OPTS \
- org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
-elif [ "$starting_privileged_nfs" = "true" ] ; then
- if [ "$HADOOP_PID_DIR" = "" ]; then
- HADOOP_PRIVILEGED_NFS_PID="/tmp/hadoop_privileged_nfs3.pid"
+if [[ -n "${daemon}" ]]; then
+ if [[ -n "${secure_service}" ]]; then
+ hadoop_secure_daemon_handler \
+ "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
+ "${daemon_pidfile}" "${daemon_outfile}" \
+ "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
else
- HADOOP_PRIVILEGED_NFS_PID="$HADOOP_PID_DIR/hadoop_privileged_nfs3.pid"
+ hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
+ "${daemon_pidfile}" "${daemon_outfile}" "$@"
fi
-
- JSVC=$JSVC_HOME/jsvc
- if [ ! -f $JSVC ]; then
- echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run privileged NFS gateways. "
- echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
- "and set JSVC_HOME to the directory containing the jsvc binary."
- exit
- fi
-
- if [[ ! $JSVC_OUTFILE ]]; then
- JSVC_OUTFILE="$HADOOP_LOG_DIR/nfs3_jsvc.out"
- fi
-
- if [[ ! $JSVC_ERRFILE ]]; then
- JSVC_ERRFILE="$HADOOP_LOG_DIR/nfs3_jsvc.err"
- fi
-
- exec "$JSVC" \
- -Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
- -errfile "$JSVC_ERRFILE" \
- -pidfile "$HADOOP_PRIVILEGED_NFS_PID" \
- -nodetach \
- -user "$HADOOP_PRIVILEGED_NFS_USER" \
- -cp "$CLASSPATH" \
- $JAVA_HEAP_MAX $HADOOP_OPTS \
- org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter "$@"
+ exit $?
else
- # run it
- exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
+ # shellcheck disable=SC2086
+ hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi
-
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh
index 2aabf5300b..fb460d96d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh
@@ -18,19 +18,67 @@
# included in all the hdfs scripts with source command
# should not be executed directly
-bin=`which "$0"`
-bin=`dirname "${bin}"`
-bin=`cd "$bin"; pwd`
+function hadoop_subproject_init
+{
+ if [ -e "${HADOOP_CONF_DIR}/hdfs-env.sh" ]; then
+ . "${HADOOP_CONF_DIR}/hdfs-env.sh"
+ fi
+
+ # at some point in time, someone thought it would be a good idea to
+ # create separate vars for every subproject. *sigh*
+ # let's perform some overrides and setup some defaults for bw compat
+ # this way the common hadoop var's == subproject vars and can be
+ # used interchangeable from here on out
+ # ...
+ # this should get deprecated at some point.
+ HADOOP_LOG_DIR="${HADOOP_HDFS_LOG_DIR:-$HADOOP_LOG_DIR}"
+ HADOOP_HDFS_LOG_DIR="${HADOOP_LOG_DIR}"
+
+ HADOOP_LOGFILE="${HADOOP_HDFS_LOGFILE:-$HADOOP_LOGFILE}"
+ HADOOP_HDFS_LOGFILE="${HADOOP_LOGFILE}"
+
+ HADOOP_NICENESS=${HADOOP_HDFS_NICENESS:-$HADOOP_NICENESS}
+ HADOOP_HDFS_NICENESS="${HADOOP_NICENESS}"
+
+ HADOOP_STOP_TIMEOUT=${HADOOP_HDFS_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}
+ HADOOP_HDFS_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
+
+ HADOOP_PID_DIR="${HADOOP_HDFS_PID_DIR:-$HADOOP_PID_DIR}"
+ HADOOP_HDFS_PID_DIR="${HADOOP_PID_DIR}"
+
+ HADOOP_ROOT_LOGGER=${HADOOP_HDFS_ROOT_LOGGER:-$HADOOP_ROOT_LOGGER}
+ HADOOP_HDFS_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
+
+ HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME_DIR}"
+
+ HADOOP_IDENT_STRING="${HADOOP_HDFS_IDENT_STRING:-$HADOOP_IDENT_STRING}"
+ HADOOP_HDFS_IDENT_STRING="${HADOOP_IDENT_STRING}"
+
+ # turn on the defaults
+
+ export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
+ export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
+ export HADOOP_DATANODE_OPTS=${HADOOP_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"}
+ export HADOOP_DN_SECURE_EXTRA_OPTS=${HADOOP_DN_SECURE_EXTRA_OPTS:-"-jvm server"}
+ export HADOOP_NFS3_SECURE_EXTRA_OPTS=${HADOOP_NFS3_SECURE_EXTRA_OPTS:-"-jvm server"}
+ export HADOOP_PORTMAP_OPTS=${HADOOP_PORTMAP_OPTS:-"-Xmx512m"}
+
+
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
- . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
-elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
- . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
-elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
- . "$HADOOP_HOME"/libexec/hadoop-config.sh
-else
- echo "Hadoop common not found."
- exit
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+ _hd_this="${BASH_SOURCE-$0}"
+ HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P)
fi
+
+if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
+ . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
+elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
+ . "${HADOOP_HOME}/libexec/hadoop-config.sh"
+else
+ echo "ERROR: Hadoop common not found." 2>&1
+ exit 1
+fi
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/refresh-namenodes.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/refresh-namenodes.sh
index d3f67598b6..3bac3b4777 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/refresh-namenodes.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/refresh-namenodes.sh
@@ -20,24 +20,40 @@
# This script refreshes all namenodes, it's a simple wrapper
# for dfsadmin to support multiple namenodes.
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
-
-namenodes=$("$HADOOP_PREFIX/bin/hdfs" getconf -nnRpcAddresses)
-if [ "$?" != '0' ] ; then errorFlag='1' ;
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
- for namenode in $namenodes ; do
- echo "Refreshing namenode [$namenode]"
- "$HADOOP_PREFIX/bin/hdfs" dfsadmin -fs hdfs://$namenode -refreshNodes
- if [ "$?" != '0' ] ; then errorFlag='1' ; fi
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
+
+namenodes=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -nnRpcAddresses)
+if [[ "$?" != '0' ]] ; then
+ errorFlag='1' ;
+else
+ for namenode in ${namenodes} ; do
+ echo "Refreshing namenode [${namenode}]"
+ "${HADOOP_HDFS_HOME}/bin/hdfs" dfsadmin \
+ -fs hdfs://${namenode} -refreshNodes
+ if [[ "$?" != '0' ]]; then
+ errorFlag='1'
+ fi
done
fi
-if [ "$errorFlag" = '1' ] ; then
+if [[ "${errorFlag}" = '1' ]] ; then
echo "Error: refresh of namenodes failed, see error messages above."
exit 1
else
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-balancer.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-balancer.sh
index 2c14a59f8a..a116502fc7 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-balancer.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-balancer.sh
@@ -15,13 +15,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function usage
+{
+ echo "Usage: start-balancer.sh [--config confdir] [-policy ] [-threshold ]"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
# Start balancer daemon.
-"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start balancer $@
+exec "${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" start balancer "$@"
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
index 8cbea16aa6..5799dec876 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
@@ -20,98 +20,128 @@
# Optinally upgrade or rollback dfs state.
# Run this on master node.
-usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]"
+function hadoop_usage
+{
+ echo "Usage: start-dfs.sh [-upgrade|-rollback] [-clusterId]"
+}
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
# get arguments
-if [ $# -ge 1 ]; then
- nameStartOpt="$1"
- shift
- case "$nameStartOpt" in
- (-upgrade)
- ;;
- (-rollback)
- dataStartOpt="$nameStartOpt"
- ;;
- (*)
- echo $usage
- exit 1
- ;;
- esac
+if [[ $# -ge 1 ]]; then
+ nameStartOpt="$1"
+ shift
+ case "$nameStartOpt" in
+ -upgrade)
+ ;;
+ -rollback)
+ dataStartOpt="$nameStartOpt"
+ ;;
+ *)
+ hadoop_exit_with_usage 1
+ ;;
+ esac
fi
+
#Add other possible options
nameStartOpt="$nameStartOpt $@"
#---------------------------------------------------------
# namenodes
-NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
+NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes 2>/dev/null)
+
+if [[ -z "${NAMENODES}" ]]; then
+ NAMENODES=$(hostname)
+fi
echo "Starting namenodes on [$NAMENODES]"
-"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$NAMENODES" \
- --script "$bin/hdfs" start namenode $nameStartOpt
+"${bin}/hadoop-daemons.sh" \
+--config "${HADOOP_CONF_DIR}" \
+--hostnames "${NAMENODES}" \
+start namenode ${nameStartOpt}
#---------------------------------------------------------
# datanodes (using default slaves file)
-if [ -n "$HADOOP_SECURE_DN_USER" ]; then
- echo \
- "Attempting to start secure cluster, skipping datanodes. " \
- "Run start-secure-dns.sh as root to complete startup."
+if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
+[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
+ echo "ERROR: Attempting to start secure cluster, skipping datanodes. "
+ echo "Run start-secure-dns.sh as root or configure "
+ echo "\${HADOOP_SECURE_COMMAND} to complete startup."
else
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --script "$bin/hdfs" start datanode $dataStartOpt
+
+ echo "Starting datanodes"
+
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ start datanode ${dataStartOpt}
fi
#---------------------------------------------------------
# secondary namenodes (if any)
-SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
+SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
-if [ -n "$SECONDARY_NAMENODES" ]; then
- echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"
+if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
+ SECONDARY_NAMENODES=$(hostname)
+fi
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$SECONDARY_NAMENODES" \
- --script "$bin/hdfs" start secondarynamenode
+if [[ -n "${SECONDARY_NAMENODES}" ]]; then
+ echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]"
+
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ --hostnames "${SECONDARY_NAMENODES}" \
+ start secondarynamenode
fi
#---------------------------------------------------------
# quorumjournal nodes (if any)
-SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
+SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
-case "$SHARED_EDITS_DIR" in
-qjournal://*)
- JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
- echo "Starting journal nodes [$JOURNAL_NODES]"
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$JOURNAL_NODES" \
- --script "$bin/hdfs" start journalnode ;;
+case "${SHARED_EDITS_DIR}" in
+ qjournal://*)
+ JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
+ echo "Starting journal nodes [${JOURNAL_NODES}]"
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ --hostnames "${JOURNAL_NODES}" \
+ start journalnode
+ ;;
esac
#---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled
-AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
-if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
- echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]"
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$NAMENODES" \
- --script "$bin/hdfs" start zkfc
+AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
+if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
+ echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]"
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ --hostnames "${NAMENODES}" \
+ start zkfc
fi
# eof
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
index 7ddf687922..ab69cc237e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
@@ -17,17 +17,33 @@
# Run as root to start secure datanodes in a security-enabled cluster.
-usage="Usage (run as root in order to start secure datanodes): start-secure-dns.sh"
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage {
+ echo "Usage: start-secure-dns.sh"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
-if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
- "$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start datanode $dataStartOpt
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
- echo $usage
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
+
+if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
+ exec "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" start datanode "${dataStartOpt}"
+else
+ echo hadoop_usage_and_exit 1
fi
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-balancer.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-balancer.sh
index df824560cc..718f867436 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-balancer.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-balancer.sh
@@ -15,14 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage
+{
+ echo "Usage: stop-balancer.sh [--config confdir]"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
# Stop balancer daemon.
# Run this on the machine where the balancer is running
-"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop balancer
+"${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" stop balancer
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
index 6a622fae47..4f4d4f4017 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
@@ -15,75 +15,100 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage
+{
+ echo "Usage: start-balancer.sh [--config confdir] [-policy ] [-threshold ]"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
#---------------------------------------------------------
# namenodes
-NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
+NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes)
echo "Stopping namenodes on [$NAMENODES]"
-"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$NAMENODES" \
- --script "$bin/hdfs" stop namenode
+"${bin}/hadoop-daemons.sh" \
+--config "${HADOOP_CONF_DIR}" \
+--hostnames "${NAMENODES}" \
+stop namenode
#---------------------------------------------------------
# datanodes (using default slaves file)
-if [ -n "$HADOOP_SECURE_DN_USER" ]; then
+if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
+[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
echo \
- "Attempting to stop secure cluster, skipping datanodes. " \
- "Run stop-secure-dns.sh as root to complete shutdown."
+ "ERROR: Attempting to stop secure cluster, skipping datanodes. " \
+ "Run stop-secure-dns.sh as root to complete shutdown."
else
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --script "$bin/hdfs" stop datanode
+
+ echo "Stopping datanodes"
+
+ "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
fi
#---------------------------------------------------------
# secondary namenodes (if any)
-SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
+SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
-if [ -n "$SECONDARY_NAMENODES" ]; then
- echo "Stopping secondary namenodes [$SECONDARY_NAMENODES]"
+if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
+ SECONDARY_NAMENODES=$(hostname)
+fi
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$SECONDARY_NAMENODES" \
- --script "$bin/hdfs" stop secondarynamenode
+if [[ -n "${SECONDARY_NAMENODES}" ]]; then
+ echo "Stopping secondary namenodes [${SECONDARY_NAMENODES}]"
+
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ --hostnames "${SECONDARY_NAMENODES}" \
+ stop secondarynamenode
fi
#---------------------------------------------------------
# quorumjournal nodes (if any)
-SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
+SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
-case "$SHARED_EDITS_DIR" in
-qjournal://*)
- JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
- echo "Stopping journal nodes [$JOURNAL_NODES]"
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$JOURNAL_NODES" \
- --script "$bin/hdfs" stop journalnode ;;
+case "${SHARED_EDITS_DIR}" in
+ qjournal://*)
+ JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
+ echo "Stopping journal nodes [${JOURNAL_NODES}]"
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ --hostnames "${JOURNAL_NODES}" \
+ stop journalnode
+ ;;
esac
#---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled
-AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
-if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
- echo "Stopping ZK Failover Controllers on NN hosts [$NAMENODES]"
- "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
- --config "$HADOOP_CONF_DIR" \
- --hostnames "$NAMENODES" \
- --script "$bin/hdfs" stop zkfc
+AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
+if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
+ echo "Stopping ZK Failover Controllers on NN hosts [${NAMENODES}]"
+ "${bin}/hadoop-daemons.sh" \
+ --config "${HADOOP_CONF_DIR}" \
+ --hostnames "${NAMENODES}" \
+ stop zkfc
fi
# eof
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
index fdd47c3891..efce92edee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
@@ -17,17 +17,33 @@
# Run as root to start secure datanodes in a security-enabled cluster.
-usage="Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage {
+ echo "Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
-if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
- "$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop datanode
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
- echo $usage
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+ exit 1
+fi
+
+if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
+ "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
+else
+ hadoop_exit_with_usage 1
fi
diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred
index b95c7cd4f9..340a95b8a3 100755
--- a/hadoop-mapreduce-project/bin/mapred
+++ b/hadoop-mapreduce-project/bin/mapred
@@ -15,138 +15,129 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-bin=`which $0`
-bin=`dirname ${bin}`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
- . ${HADOOP_LIBEXEC_DIR}/mapred-config.sh
-else
- . "$bin/mapred-config.sh"
-fi
-
-function print_usage(){
- echo "Usage: mapred [--config confdir] COMMAND"
+function hadoop_usage
+{
+ echo "Usage: mapred [--config confdir] [--daemon (start|stop|status)] COMMAND"
echo " where COMMAND is one of:"
- echo " pipes run a Pipes job"
- echo " job manipulate MapReduce jobs"
- echo " queue get information regarding JobQueues"
+
+ echo " archive -archiveName NAME -p * create a hadoop archive"
echo " classpath prints the class path needed for running"
echo " mapreduce subcommands"
- echo " historyserver run job history servers as a standalone daemon"
echo " distcp copy file or directories recursively"
- echo " archive -archiveName NAME -p * create a hadoop archive"
- echo " hsadmin job history server admin interface"
+ echo " job manipulate MapReduce jobs"
+ echo " historyserver run job history servers as a standalone daemon"
+ echo " pipes run a Pipes job"
+ echo " queue get information regarding JobQueues"
+ echo " sampler sampler"
echo ""
echo "Most commands print help when invoked w/o parameters."
}
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/mapred-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/mapred-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/mapred-config.sh." 2>&1
+ exit 1
+fi
+
+
if [ $# = 0 ]; then
- print_usage
- exit
+ hadoop_exit_with_usage 1
fi
COMMAND=$1
shift
-case $COMMAND in
- # usage flags
- --help|-help|-h)
- print_usage
- exit
- ;;
+case ${COMMAND} in
+ mradmin|jobtracker|tasktracker|groups)
+ echo "Sorry, the ${COMMAND} command is no longer supported."
+ echo "You may find similar functionality with the \"yarn\" shell command."
+ hadoop_exit_with_usage 1
+ ;;
+ archive)
+ CLASS=org.apache.hadoop.tools.HadoopArchives
+ hadoop_add_classpath "${TOOL_PATH}"
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ classpath)
+ hadoop_finalize
+ echo "${CLASSPATH}"
+ exit 0
+ ;;
+ distcp)
+ CLASS=org.apache.hadoop.tools.DistCp
+ hadoop_add_classpath "${TOOL_PATH}"
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ historyserver)
+ daemon="true"
+ CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}"
+ if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
+ JAVA_HEAP_MAX="-Xmx${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}m"
+ fi
+ HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
+ ;;
+ job)
+ CLASS=org.apache.hadoop.mapred.JobClient
+ ;;
+ pipes)
+ CLASS=org.apache.hadoop.mapred.pipes.Submitter
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ queue)
+ CLASS=org.apache.hadoop.mapred.JobQueueClient
+ ;;
+ sampler)
+ CLASS=org.apache.hadoop.mapred.lib.InputSampler
+ HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+ ;;
+ -*|*)
+ hadoop_exit_with_usage 1
+ ;;
esac
-if [ "$COMMAND" = "job" ] ; then
- CLASS=org.apache.hadoop.mapred.JobClient
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "queue" ] ; then
- CLASS=org.apache.hadoop.mapred.JobQueueClient
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "pipes" ] ; then
- CLASS=org.apache.hadoop.mapred.pipes.Submitter
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "sampler" ] ; then
- CLASS=org.apache.hadoop.mapred.lib.InputSampler
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "classpath" ] ; then
- echo -n
-elif [ "$COMMAND" = "historyserver" ] ; then
- CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
- HADOOP_OPTS="$HADOOP_OPTS -Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console} $HADOOP_JOB_HISTORYSERVER_OPTS"
- if [ "$HADOOP_JOB_HISTORYSERVER_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$HADOOP_JOB_HISTORYSERVER_HEAPSIZE""m"
- fi
-elif [ "$COMMAND" = "mradmin" ] \
- || [ "$COMMAND" = "jobtracker" ] \
- || [ "$COMMAND" = "tasktracker" ] \
- || [ "$COMMAND" = "groups" ] ; then
- echo "Sorry, the $COMMAND command is no longer supported."
- echo "You may find similar functionality with the \"yarn\" shell command."
- print_usage
- exit 1
-elif [ "$COMMAND" = "distcp" ] ; then
- CLASS=org.apache.hadoop.tools.DistCp
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "archive" ] ; then
- CLASS=org.apache.hadoop.tools.HadoopArchives
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "hsadmin" ] ; then
- CLASS=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-else
- echo $COMMAND - invalid command
- print_usage
- exit 1
+daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
+
+
+if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
+ # shellcheck disable=SC2034
+ HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+ hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}"
+ # shellcheck disable=SC2034
+ HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi
-# for developers, add mapred classes to CLASSPATH
-if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
-fi
-if [ -d "$HADOOP_MAPRED_HOME/build/webapps" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build
-fi
-if [ -d "$HADOOP_MAPRED_HOME/build/test/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/test/classes
-fi
-if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
-fi
-
-# for releases, add core mapred jar & webapps to CLASSPATH
-if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR}
-fi
-for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-# Need YARN jars also
-for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-# add libs to CLASSPATH
-for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-# add modules to CLASSPATH
-for f in $HADOOP_MAPRED_HOME/modules/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-if [ "$COMMAND" = "classpath" ] ; then
- echo $CLASSPATH
- exit
-fi
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
+hadoop_finalize
export CLASSPATH
-exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
+
+if [[ -n "${daemon}" ]]; then
+ if [[ -n "${secure_service}" ]]; then
+ hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}"\
+ "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
+ "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
+ else
+ hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
+ "${daemon_pidfile}" "${daemon_outfile}" "$@"
+ fi
+ exit $?
+else
+ hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
+fi
+
diff --git a/hadoop-mapreduce-project/bin/mapred-config.sh b/hadoop-mapreduce-project/bin/mapred-config.sh
index 254e0a0f43..c24d3509c4 100644
--- a/hadoop-mapreduce-project/bin/mapred-config.sh
+++ b/hadoop-mapreduce-project/bin/mapred-config.sh
@@ -18,35 +18,55 @@
# included in all the mapred scripts with source command
# should not be executed directly
-bin=`which "$0"`
-bin=`dirname "${bin}"`
-bin=`cd "$bin"; pwd`
+function hadoop_subproject_init
+{
+ if [ -e "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
+ . "${HADOOP_CONF_DIR}/mapred-env.sh"
+ fi
+
+ # at some point in time, someone thought it would be a good idea to
+ # create separate vars for every subproject. *sigh*
+ # let's perform some overrides and setup some defaults for bw compat
+ # this way the common hadoop var's == subproject vars and can be
+ # used interchangeable from here on out
+ # ...
+ # this should get deprecated at some point.
+ HADOOP_LOG_DIR="${HADOOP_MAPRED_LOG_DIR:-$HADOOP_LOG_DIR}"
+ HADOOP_MAPRED_LOG_DIR="${HADOOP_LOG_DIR}"
+
+ HADOOP_LOGFILE="${HADOOP_MAPRED_LOGFILE:-$HADOOP_LOGFILE}"
+ HADOOP_MAPRED_LOGFILE="${HADOOP_LOGFILE}"
+
+ HADOOP_NICENESS="${HADOOP_MAPRED_NICENESS:-$HADOOP_NICENESS}"
+ HADOOP_MAPRED_NICENESS="${HADOOP_NICENESS}"
+
+ HADOOP_STOP_TIMEOUT="${HADOOP_MAPRED_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
+ HADOOP_MAPRED_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
+
+ HADOOP_PID_DIR="${HADOOP_MAPRED_PID_DIR:-$HADOOP_PID_DIR}"
+ HADOOP_MAPRED_PID_DIR="${HADOOP_PID_DIR}"
+
+ HADOOP_ROOT_LOGGER="${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}"
+ HADOOP_MAPRED_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
+
+ HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_HOME_DIR}"
+
+ HADOOP_IDENT_STRING="${HADOOP_MAPRED_IDENT_STRING:-$HADOOP_IDENT_STRING}"
+ HADOOP_MAPRED_IDENT_STRING="${HADOOP_IDENT_STRING}"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+ _mc_this="${BASH_SOURCE-$0}"
+ HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_mc_this}")" >/dev/null && pwd -P)
+fi
+
+if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
-elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
- . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
-elif [ -e "${HADOOP_COMMON_HOME}/bin/hadoop-config.sh" ]; then
- . "$HADOOP_COMMON_HOME"/bin/hadoop-config.sh
-elif [ -e "${HADOOP_HOME}/bin/hadoop-config.sh" ]; then
- . "$HADOOP_HOME"/bin/hadoop-config.sh
-elif [ -e "${HADOOP_MAPRED_HOME}/bin/hadoop-config.sh" ]; then
- . "$HADOOP_MAPRED_HOME"/bin/hadoop-config.sh
+elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
+ . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
+elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
+ . "${HADOOP_HOME}/libexec/hadoop-config.sh"
else
echo "Hadoop common not found."
exit
fi
-
-# Only set locally to use in HADOOP_OPTS. No need to export.
-# The following defaults are useful when somebody directly invokes bin/mapred.
-HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs}
-HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log}
-HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE"
-export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}"
-
-
diff --git a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh
index 7585c9a81e..7f6e6f1db6 100644
--- a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh
+++ b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh
@@ -15,133 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+function hadoop_usage
+{
+ echo "Usage: mr-jobhistory-daemon.sh [--config confdir] (start|stop|status) "
+}
-#
-# Environment Variables
-#
-# HADOOP_JHS_LOGGER Hadoop JobSummary logger.
-# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf.
-# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default.
-# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
-usage="Usage: mr-jobhistory-daemon.sh [--config ] (start|stop) "
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
- echo $usage
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
- . $HADOOP_LIBEXEC_DIR/mapred-config.sh
-fi
-
-# get arguments
-startStop=$1
-shift
-command=$1
+daemonmode=$1
shift
-hadoop_rotate_log ()
-{
- log=$1;
- num=5;
- if [ -n "$2" ]; then
- num=$2
- fi
- if [ -f "$log" ]; then # rotate logs
- while [ $num -gt 1 ]; do
- prev=`expr $num - 1`
- [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
- num=$prev
- done
- mv "$log" "$log.$num";
- fi
-}
-
-if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then
- export HADOOP_MAPRED_IDENT_STRING="$USER"
-fi
-
-export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}}
-export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log
-export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA}
-export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA}
-
-if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
- . "${HADOOP_CONF_DIR}/mapred-env.sh"
-fi
-
-mkdir -p "$HADOOP_MAPRED_LOG_DIR"
-chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR
-
-if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then
- HADOOP_MAPRED_PID_DIR=/tmp
-fi
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING"
-
-log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out
-pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid
-
-HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5}
-
-# Set default scheduling priority
-if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then
- export HADOOP_MAPRED_NICENESS=0
-fi
-
-case $startStop in
-
- (start)
-
- mkdir -p "$HADOOP_MAPRED_PID_DIR"
-
- if [ -f $pid ]; then
- if kill -0 `cat $pid` > /dev/null 2>&1; then
- echo $command running as process `cat $pid`. Stop it first.
- exit 1
- fi
- fi
-
- hadoop_rotate_log $log
- echo starting $command, logging to $log
- cd "$HADOOP_MAPRED_HOME"
- nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
- echo $! > $pid
- sleep 1; head "$log"
- ;;
-
- (stop)
-
- if [ -f $pid ]; then
- TARGET_PID=`cat $pid`
- if kill -0 $TARGET_PID > /dev/null 2>&1; then
- echo stopping $command
- kill $TARGET_PID
- sleep $HADOOP_MAPRED_STOP_TIMEOUT
- if kill -0 $TARGET_PID > /dev/null 2>&1; then
- echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9"
- kill -9 $TARGET_PID
- fi
- else
- echo no $command to stop
- fi
- rm -f $pid
- else
- echo no $command to stop
- fi
- ;;
-
- (*)
- echo $usage
- exit 1
- ;;
-
-esac
+exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
+--config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
diff --git a/hadoop-mapreduce-project/conf/mapred-env.sh b/hadoop-mapreduce-project/conf/mapred-env.sh
index 6be1e27966..6c417a3a0f 100644
--- a/hadoop-mapreduce-project/conf/mapred-env.sh
+++ b/hadoop-mapreduce-project/conf/mapred-env.sh
@@ -13,15 +13,59 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+##
+## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
+## WORK DONE BY THE mapred AND RELATED COMMANDS.
+##
+## Precedence rules:
+##
+## mapred-env.sh > hadoop-env.sh > hard-coded defaults
+##
+## MAPRED_xyz > HADOOP_xyz > hard-coded defaults
+##
-export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+###
+# Generic settings for MapReduce
+###
-export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+#Override the log4j settings for all MR apps
+# export MAPRED_ROOT_LOGGER="INFO,console"
+# Override Hadoop's log directory & file
+# export HADOOP_MAPRED_LOG_DIR=""
+
+# Override Hadoop's pid directory
+# export HADOOP_MAPRED_PID_DIR=
+
+# Override Hadoop's identity string. $USER by default.
+# This is used in writing log and pid files, so keep that in mind!
+# export HADOOP_MAPRED_IDENT_STRING=$USER
+
+# Override Hadoop's process priority
+# Note that sub-processes will also run at this level!
+# export HADOOP_MAPRED_NICENESS=0
+
+###
+# Job History Server specific parameters
+###
+
+# Specify the max heapsize for the Job History Server using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either
+# MAPRED_OPTS, HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
+#export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+
+# Specify the JVM options to be used when starting the ResourceManager.
+# These options will be appended to the options specified as YARN_OPTS
+# and therefore may override any similar flags set in YARN_OPTS
#export HADOOP_JOB_HISTORYSERVER_OPTS=
-#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default.
-#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
-#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
-#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
-#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
+
+# Specify the log4j settings for the JobHistoryServer
+#export HADOOP_JHS_LOGGER=INFO,RFA
+
+
+
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/slaves.sh b/hadoop-yarn-project/hadoop-yarn/bin/slaves.sh
deleted file mode 100644
index 9b783b4f32..0000000000
--- a/hadoop-yarn-project/hadoop-yarn/bin/slaves.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Run a shell command on all slave hosts.
-#
-# Environment Variables
-#
-# YARN_SLAVES File naming remote hosts.
-# Default is ${YARN_CONF_DIR}/slaves.
-# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
-# YARN_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-# YARN_SSH_OPTS Options passed to ssh when running remote commands.
-##
-
-usage="Usage: slaves.sh [--config confdir] command..."
-
-# if no args specified, show usage
-if [ $# -le 0 ]; then
- echo $usage
- exit 1
-fi
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-# If the slaves file is specified in the command line,
-# then it takes precedence over the definition in
-# yarn-env.sh. Save it here.
-HOSTLIST=$YARN_SLAVES
-
-if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
- . "${YARN_CONF_DIR}/yarn-env.sh"
-fi
-
-if [ "$HOSTLIST" = "" ]; then
- if [ "$YARN_SLAVES" = "" ]; then
- export HOSTLIST="${YARN_CONF_DIR}/slaves"
- else
- export HOSTLIST="${YARN_SLAVES}"
- fi
-fi
-
-for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
- ssh $YARN_SSH_OPTS $slave $"${@// /\\ }" \
- 2>&1 | sed "s/^/$slave: /" &
- if [ "$YARN_SLAVE_SLEEP" != "" ]; then
- sleep $YARN_SLAVE_SLEEP
- fi
-done
-
-wait
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh b/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh
index 40b77fb35a..6803601b51 100644
--- a/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh
+++ b/hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh
@@ -16,20 +16,34 @@
# limitations under the License.
-# Start all yarn daemons. Run this on master node.
+function hadoop_usage
+{
+ echo "Usage: start-yarn.sh [--config confdir]"
+}
-echo "starting yarn daemons"
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
+ exit 1
+fi
# start resourceManager
-"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start resourcemanager
+"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" start resourcemanager
# start nodeManager
-"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR start nodemanager
+"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}" start nodemanager
# start proxyserver
-#"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start proxyserver
+#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" start proxyserver
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh b/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh
index a8498ef3ff..605971b765 100644
--- a/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh
+++ b/hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh
@@ -18,18 +18,34 @@
# Stop all yarn daemons. Run this on master node.
-echo "stopping yarn daemons"
+function hadoop_usage
+{
+ echo "Usage: stop-yarn.sh [--config confdir]"
+}
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
-# stop resourceManager
-"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop resourcemanager
-# stop nodeManager
-"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR stop nodemanager
-# stop proxy server
-"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop proxyserver
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
+ exit 1
+fi
+
+# start resourceManager
+"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" stop resourcemanager
+# start nodeManager
+"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}" stop nodemanager
+# start proxyserver
+#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" stop proxyserver
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn
index 200ab27d38..2017d57fea 100644
--- a/hadoop-yarn-project/hadoop-yarn/bin/yarn
+++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn
@@ -15,266 +15,182 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
-# The Hadoop command script
-#
-# Environment Variables
-#
-# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
-#
-# YARN_USER_CLASSPATH Additional user CLASSPATH entries.
-#
-# YARN_USER_CLASSPATH_FIRST If set to non empty value then the user classpath
-# specified in YARN_USER_CLASSPATH will be
-# appended at the beginning of YARN's final
-# classpath instead of at the end.
-#
-# YARN_HEAPSIZE The maximum amount of heap to use, in MB.
-# Default is 1000.
-#
-# YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command
-# eg YARN_NODEMANAGER_HEAPSIZE sets the heap
-# size for the NodeManager. If you set the
-# heap size in YARN_{COMMAND}_OPTS or YARN_OPTS
-# they take precedence.
-#
-# YARN_OPTS Extra Java runtime options.
-#
-# YARN_CLIENT_OPTS when the respective command is run.
-# YARN_{COMMAND}_OPTS etc YARN_NODEMANAGER_OPTS applies to NodeManager
-# for e.g. YARN_CLIENT_OPTS applies to
-# more than one command (fs, dfs, fsck,
-# dfsadmin etc)
-#
-# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
-#
-# YARN_ROOT_LOGGER The root appender. Default is INFO,console
-#
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-function print_usage(){
- echo "Usage: yarn [--config confdir] COMMAND"
+function hadoop_usage
+{
+ echo "Usage: yarn [--config confdir] [--daemon (start|stop|status)] COMMAND"
echo "where COMMAND is one of:"
- echo " resourcemanager -format-state-store deletes the RMStateStore"
- echo " resourcemanager run the ResourceManager"
- echo " nodemanager run a nodemanager on each slave"
- echo " timelineserver run the timeline server"
- echo " rmadmin admin tools"
- echo " version print the version"
- echo " jar run a jar file"
- echo " application prints application(s)"
- echo " report/kill application"
- echo " applicationattempt prints applicationattempt(s)"
- echo " report"
+ echo " application prints application(s) report/kill application"
+ echo " applicationattempt prints applicationattempt(s) report"
+ echo " classpath prints the class path needed to get the"
+ echo " Hadoop jar and the required libraries"
echo " container prints container(s) report"
- echo " node prints node report(s)"
+ echo " daemonlog get/set the log level for each daemon"
+ echo " jar run a jar file"
echo " logs dump container logs"
- echo " classpath prints the class path needed to"
- echo " get the Hadoop jar and the"
- echo " required libraries"
- echo " daemonlog get/set the log level for each"
- echo " daemon"
+ echo " node prints node report(s)"
+ echo " nodemanager run a nodemanager on each slave"
+ echo " proxyserver run the web app proxy server"
+ echo " resourcemanager run the ResourceManager"
+ echo " resourcemanager -format-state-store deletes the RMStateStore"
+ echo " rmadmin admin tools"
+ echo " timelineserver run the timeline server"
+ echo " version print the version"
echo " or"
echo " CLASSNAME run the class named CLASSNAME"
echo "Most commands print help when invoked w/o parameters."
}
-# if no args specified, show usage
-if [ $# = 0 ]; then
- print_usage
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
+# if no args specified, show usage
+if [[ $# = 0 ]]; then
+ hadoop_exit_with_usage 1
+fi
+
# get arguments
COMMAND=$1
shift
-case $COMMAND in
- # usage flags
- --help|-help|-h)
- print_usage
+case "${COMMAND}" in
+ application|applicationattempt|container)
+ CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ classpath)
+ hadoop_finalize
+ echo "${CLASSPATH}"
exit
- ;;
+ ;;
+ daemonlog)
+ CLASS=org.apache.hadoop.log.LogLevel
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ jar)
+ CLASS=org.apache.hadoop.util.RunJar
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ historyserver)
+ daemon="true"
+ echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
+ echo "Instead use the timelineserver command for it." 1>&2
+ echo "Starting the History Server anyway..." 1>&2
+ CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
+ ;;
+ logs)
+ CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ node)
+ CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ nodemanager)
+ daemon="true"
+ CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
+ YARN_OPTS="${YARN_OPTS} ${YARN_NODEMANAGER_OPTS}"
+ if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then
+ JAVA_HEAP_MAX="-Xmx${YARN_NODEMANAGER_HEAPSIZE}m"
+ fi
+ ;;
+ proxyserver)
+ daemon="true"
+ CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
+ YARN_OPTS="${YARN_OPTS} ${YARN_PROXYSERVER_OPTS}"
+ if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then
+ JAVA_HEAP_MAX="-Xmx${YARN_PROXYSERVER_HEAPSIZE}m"
+ fi
+ ;;
+ resourcemanager)
+ daemon="true"
+ CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
+ YARN_OPTS="${YARN_OPTS} ${YARN_RESOURCEMANAGER_OPTS}"
+ if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then
+ JAVA_HEAP_MAX="-Xmx${YARN_RESOURCEMANAGER_HEAPSIZE}m"
+ fi
+ ;;
+ rmadmin)
+ CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ timelineserver)
+ daemon="true"
+ CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
+ YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}"
+ if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
+ JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m"
+ fi
+ ;;
+ version)
+ CLASS=org.apache.hadoop.util.VersionInfo
+ YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+ ;;
+ -*)
+ hadoop_exit_with_usage 1
+ ;;
+ *)
+ CLASS="${COMMAND}"
+ ;;
esac
-if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
- . "${YARN_CONF_DIR}/yarn-env.sh"
+# set HADOOP_OPTS to YARN_OPTS so that we can use
+# finalize, etc, without doing anything funky
+HADOOP_OPTS="${YARN_OPTS}"
+
+daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
+
+if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
+ # shellcheck disable=SC2034
+ HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+ YARN_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+ HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi
-# some Java parameters
-if [ "$JAVA_HOME" != "" ]; then
- #echo "run java in $JAVA_HOME"
- JAVA_HOME=$JAVA_HOME
-fi
-
-if [ "$JAVA_HOME" = "" ]; then
- echo "Error: JAVA_HOME is not set."
- exit 1
-fi
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
-JAVA=$JAVA_HOME/bin/java
-JAVA_HEAP_MAX=-Xmx1000m
+# Add YARN custom options to comamnd line in case someone actaully
+# used these.
+#
+# Note that we are replacing ' ' with '\ ' so that when we exec
+# stuff it works
+#
+hadoop_add_param HADOOP_OPTS yarn.log.dir "-Dyarn.log.dir=${HADOOP_LOG_DIR/ /\ }"
+hadoop_add_param HADOOP_OPTS yarn.log.file "-Dyarn.log.file=${HADOOP_LOGFILE/ /\ }"
+hadoop_add_param HADOOP_OPTS yarn.home.dir "-Dyarn.home.dir=${HADOOP_YARN_HOME/ /\ }"
+hadoop_add_param HADOOP_OPTS yarn.root.logger "-Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-# check envvars which might override default args
-if [ "$YARN_HEAPSIZE" != "" ]; then
- #echo "run with heapsize $YARN_HEAPSIZE"
- JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
- #echo $JAVA_HEAP_MAX
-fi
+hadoop_finalize
-# CLASSPATH initially contains $HADOOP_CONF_DIR & $YARN_CONF_DIR
-if [ ! -d "$HADOOP_CONF_DIR" ]; then
- echo No HADOOP_CONF_DIR set.
- echo Please specify it either in yarn-env.sh or in the environment.
- exit 1
-fi
+export CLASSPATH
-CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}"
-
-# for developers, add Hadoop classes to CLASSPATH
-if [ -d "$HADOOP_YARN_HOME/yarn-api/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-api/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-common/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-common/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-mapreduce/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-mapreduce/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-master-worker/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-master-worker/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/build/test/classes" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/target/test/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/build/tools" ]; then
- CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/build/tools
-fi
-
-CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_DIR}/*
-CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_LIB_JARS_DIR}/*
-
-# Add user defined YARN_USER_CLASSPATH to the class path (if defined)
-if [ -n "$YARN_USER_CLASSPATH" ]; then
- if [ -n "$YARN_USER_CLASSPATH_FIRST" ]; then
- # User requested to add the custom entries at the beginning
- CLASSPATH=${YARN_USER_CLASSPATH}:${CLASSPATH}
+if [[ -n "${daemon}" ]]; then
+ if [[ -n "${secure_service}" ]]; then
+ hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" \
+ "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
+ "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
else
- # By default we will just append the extra entries at the end
- CLASSPATH=${CLASSPATH}:${YARN_USER_CLASSPATH}
+ hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
+ "${daemon_pidfile}" "${daemon_outfile}" "$@"
fi
-fi
-
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
-
-# default log directory & file
-if [ "$YARN_LOG_DIR" = "" ]; then
- YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
-fi
-if [ "$YARN_LOGFILE" = "" ]; then
- YARN_LOGFILE='yarn.log'
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-# figure out which class to run
-if [ "$COMMAND" = "classpath" ] ; then
- echo $CLASSPATH
- exit
-elif [ "$COMMAND" = "rmadmin" ] ; then
- CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "application" ] ||
- [ "$COMMAND" = "applicationattempt" ] ||
- [ "$COMMAND" = "container" ]; then
- CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
- set -- $COMMAND $@
-elif [ "$COMMAND" = "node" ] ; then
- CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "resourcemanager" ] ; then
- CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
- CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
- YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS"
- if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
- fi
-elif [ "$COMMAND" = "historyserver" ] ; then
- echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
- echo "Instead use the timelineserver command for it." 1>&2
- CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
- CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
- YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
- if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
- fi
-elif [ "$COMMAND" = "timelineserver" ] ; then
- CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/timelineserver-config/log4j.properties
- CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
- YARN_OPTS="$YARN_OPTS $YARN_TIMELINESERVER_OPTS"
- if [ "$YARN_TIMELINESERVER_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$YARN_TIMELINESERVER_HEAPSIZE""m"
- fi
-elif [ "$COMMAND" = "nodemanager" ] ; then
- CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
- CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
- YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS"
- if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m"
- fi
-elif [ "$COMMAND" = "proxyserver" ] ; then
- CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
- YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS"
- if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m"
- fi
-elif [ "$COMMAND" = "version" ] ; then
- CLASS=org.apache.hadoop.util.VersionInfo
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "jar" ] ; then
- CLASS=org.apache.hadoop.util.RunJar
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "logs" ] ; then
- CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "daemonlog" ] ; then
- CLASS=org.apache.hadoop.log.LogLevel
- YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
+ exit $?
else
- CLASS=$COMMAND
+ hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi
-
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$HADOOP_YARN_HOME"
-YARN_OPTS="$YARN_OPTS -Dhadoop.home.dir=$HADOOP_YARN_HOME"
-YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
-fi
-
-exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $YARN_OPTS -classpath "$CLASSPATH" $CLASS "$@"
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh
index 3d67801efe..34d2d2d0a8 100644
--- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh
+++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh
@@ -13,53 +13,81 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# included in all the hadoop scripts with source command
-# should not be executable directly
-bin=`which "$0"`
-bin=`dirname "${bin}"`
-bin=`cd "$bin"; pwd`
+function hadoop_subproject_init
+{
+
+ # at some point in time, someone thought it would be a good idea to
+ # create separate vars for every subproject. *sigh*
+ # let's perform some overrides and setup some defaults for bw compat
+ # this way the common hadoop var's == subproject vars and can be
+ # used interchangeable from here on out
+ # ...
+ # this should get deprecated at some point.
+
+ if [[ -e "${YARN_CONF_DIR}/yarn-env.sh" ]]; then
+ . "${YARN_CONF_DIR}/yarn-env.sh"
+ elif [[ -e "${HADOOP_CONF_DIR}/yarn-env.sh" ]]; then
+ . "${HADOOP_CONF_DIR}/yarn-env.sh"
+ fi
+
+ if [[ -n "${YARN_CONF_DIR}" ]]; then
+ HADOOP_CONF_DIR="${YARN_CONF_DIR}"
+ fi
+
+ YARN_CONF_DIR="${HADOOP_CONF_DIR}"
+
+ # YARN_CONF_DIR needs precedence over HADOOP_CONF_DIR
+ # and the various jar dirs
+ hadoop_add_classpath "${YARN_CONF_DIR}" before
+
+ HADOOP_LOG_DIR="${YARN_LOG_DIR:-$HADOOP_LOG_DIR}"
+ YARN_LOG_DIR="${HADOOP_LOG_DIR}"
+
+ HADOOP_LOGFILE="${YARN_LOGFILE:-$HADOOP_LOGFILE}"
+ YARN_LOGFILE="${HADOOP_LOGFILE}"
+
+ HADOOP_NICENESS="${YARN_NICENESS:-$HADOOP_NICENESS}"
+ YARN_NICENESS="${HADOOP_NICENESS}"
+
+ HADOOP_STOP_TIMEOUT="${YARN_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
+ YARN_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
+
+ HADOOP_PID_DIR="${YARN_PID_DIR:-$HADOOP_PID_DIR}"
+ YARN_PID_DIR="${HADOOP_PID_DIR}"
+
+ HADOOP_ROOT_LOGGER="${YARN_ROOT_LOGGER:-INFO,console}"
+ YARN_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
+
+ HADOOP_YARN_HOME="${HADOOP_YARN_HOME:-$HADOOP_PREFIX}"
+
+ HADOOP_IDENT_STRING="${YARN_IDENT_STRING:-$HADOOP_IDENT_STRING}"
+ YARN_IDENT_STRING="${HADOOP_IDENT_STRING}"
+
+ YARN_OPTS="${YARN_OPTS:-$HADOOP_OPTS}"
+
+ # YARN-1429 added the completely superfluous YARN_USER_CLASSPATH
+ # env var. We're going to override HADOOP_USER_CLASSPATH to keep
+ # consistency with the rest of the duplicate/useless env vars
+ HADOOP_USER_CLASSPATH="${YARN_USER_CLASSPATH:-$HADOOP_USER_CLASSPATH}"
+ YARN_USER_CLASSPATH="${HADOOP_USER_CLASSPATH}"
+
+ HADOOP_USER_CLASSPATH_FIRST="${YARN_USER_CLASSPATH_FIRST:-$HADOOP_USER_CLASSPATH_FIRST}"
+ YARN_USER_CLASSPATH_FIRST="${HADOOP_USER_CLASSPATH_FIRST}"
+}
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
- . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
-elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
- . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
-elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
- . "$HADOOP_HOME"/libexec/hadoop-config.sh
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+ _yc_this="${BASH_SOURCE-$0}"
+ HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_yc_this}")" >/dev/null && pwd -P)
+fi
+
+if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
+ . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
+elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
+ . "${HADOOP_HOME}/libexec/hadoop-config.sh"
else
echo "Hadoop common not found."
exit
fi
-# Same glibc bug that discovered in Hadoop.
-# Without this you can see very large vmem settings on containers.
-export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
-
-#check to see if the conf dir is given as an optional argument
-if [ $# -gt 1 ]
-then
- if [ "--config" = "$1" ]
- then
- shift
- confdir=$1
- shift
- YARN_CONF_DIR=$confdir
- fi
-fi
-
-# Allow alternate conf dir location.
-export YARN_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
-
-#check to see it is specified whether to use the slaves or the
-# masters file
-if [ $# -gt 1 ]
-then
- if [ "--hosts" = "$1" ]
- then
- shift
- slavesfile=$1
- shift
- export YARN_SLAVES="${YARN_CONF_DIR}/$slavesfile"
- fi
-fi
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh
index fbfa71d80d..b15448bdbc 100644
--- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh
+++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh
@@ -15,147 +15,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+function hadoop_usage
+{
+ echo "Usage: yarn-daemon.sh [--config confdir] (start|stop|status) "
+}
-# Runs a yarn command as a daemon.
-#
-# Environment Variables
-#
-# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
-# YARN_LOG_DIR Where log files are stored. PWD by default.
-# YARN_MASTER host:path where hadoop code should be rsync'd from
-# YARN_PID_DIR The pid files are stored. /tmp by default.
-# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default
-# YARN_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ this="${BASH_SOURCE-$0}"
+ bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
-usage="Usage: yarn-daemon.sh [--config ] [--hosts hostlistfile] (start|stop) "
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
- echo $usage
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-# get arguments
-startStop=$1
-shift
-command=$1
+daemonmode=$1
shift
-hadoop_rotate_log ()
-{
- log=$1;
- num=5;
- if [ -n "$2" ]; then
- num=$2
- fi
- if [ -f "$log" ]; then # rotate logs
- while [ $num -gt 1 ]; do
- prev=`expr $num - 1`
- [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
- num=$prev
- done
- mv "$log" "$log.$num";
- fi
-}
-
-if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
- . "${YARN_CONF_DIR}/yarn-env.sh"
-fi
-
-if [ "$YARN_IDENT_STRING" = "" ]; then
- export YARN_IDENT_STRING="$USER"
-fi
-
-# get log directory
-if [ "$YARN_LOG_DIR" = "" ]; then
- export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
-fi
-
-if [ ! -w "$YARN_LOG_DIR" ] ; then
- mkdir -p "$YARN_LOG_DIR"
- chown $YARN_IDENT_STRING $YARN_LOG_DIR
-fi
-
-if [ "$YARN_PID_DIR" = "" ]; then
- YARN_PID_DIR=/tmp
-fi
-
-# some variables
-export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log
-export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,RFA}
-log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out
-pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid
-YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5}
-
-# Set default scheduling priority
-if [ "$YARN_NICENESS" = "" ]; then
- export YARN_NICENESS=0
-fi
-
-case $startStop in
-
- (start)
-
- [ -w "$YARN_PID_DIR" ] || mkdir -p "$YARN_PID_DIR"
-
- if [ -f $pid ]; then
- if kill -0 `cat $pid` > /dev/null 2>&1; then
- echo $command running as process `cat $pid`. Stop it first.
- exit 1
- fi
- fi
-
- if [ "$YARN_MASTER" != "" ]; then
- echo rsync from $YARN_MASTER
- rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$HADOOP_YARN_HOME"
- fi
-
- hadoop_rotate_log $log
- echo starting $command, logging to $log
- cd "$HADOOP_YARN_HOME"
- nohup nice -n $YARN_NICENESS "$HADOOP_YARN_HOME"/bin/yarn --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
- echo $! > $pid
- sleep 1
- head "$log"
- # capture the ulimit output
- echo "ulimit -a" >> $log
- ulimit -a >> $log 2>&1
- ;;
-
- (stop)
-
- if [ -f $pid ]; then
- TARGET_PID=`cat $pid`
- if kill -0 $TARGET_PID > /dev/null 2>&1; then
- echo stopping $command
- kill $TARGET_PID
- sleep $YARN_STOP_TIMEOUT
- if kill -0 $TARGET_PID > /dev/null 2>&1; then
- echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9"
- kill -9 $TARGET_PID
- fi
- else
- echo no $command to stop
- fi
- rm -f $pid
- else
- echo no $command to stop
- fi
- ;;
-
- (*)
- echo $usage
- exit 1
- ;;
-
-esac
-
-
+exec "${HADOOP_YARN_HOME}/bin/yarn" \
+--config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh
index a7858e4969..1413a3dfac 100644
--- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh
+++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh
@@ -16,23 +16,31 @@
# limitations under the License.
-# Run a Yarn command on all slave hosts.
+function hadoop_usage
+{
+ echo "Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) "
+}
-usage="Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] [start
-|stop] command args..."
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
-# if no args specified, show usage
-if [ $# -le 1 ]; then
- echo $usage
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+ DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+ DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+ . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+ echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-exec "$bin/slaves.sh" --config $YARN_CONF_DIR cd "$HADOOP_YARN_HOME" \; "$bin/yarn-daemon.sh" --config $YARN_CONF_DIR "$@"
+hadoop_connect_to_hosts "${bin}/yarn-daemon.sh" \
+--config "${HADOOP_CONF_DIR}" "$@"
diff --git a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh
index bced9b155d..755cfd88d8 100644
--- a/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh
+++ b/hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh
@@ -13,118 +13,115 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+##
+## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
+## WORK DONE BY THE yarn AND RELATED COMMANDS.
+##
+## Precedence rules:
+##
+## yarn-env.sh > hadoop-env.sh > hard-coded defaults
+##
+## YARN_xyz > HADOOP_xyz > hard-coded defaults
+##
+
+###
+# Generic settings for YARN
+###
+
# User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
-# resolve links - $0 may be a softlink
-export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
+#
+# By default, YARN will use HADOOP_CONF_DIR. Specify a custom
+# YARN_CONF_DIR here
+# export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
+#
-# some Java parameters
-# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
-if [ "$JAVA_HOME" != "" ]; then
- #echo "run java in $JAVA_HOME"
- JAVA_HOME=$JAVA_HOME
-fi
-
-if [ "$JAVA_HOME" = "" ]; then
- echo "Error: JAVA_HOME is not set."
- exit 1
-fi
+# Override Hadoop's log directory & file
+# export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
+# export YARN_LOGFILE='yarn.log'
-JAVA=$JAVA_HOME/bin/java
-JAVA_HEAP_MAX=-Xmx1000m
+# Need a custom-to-YARN service-level authorization policy file?
+# export YARN_POLICYFILE="yarn-policy.xml"
-# For setting YARN specific HEAP sizes please use this
-# Parameter and set appropriately
-# YARN_HEAPSIZE=1000
-
-# check envvars which might override default args
-if [ "$YARN_HEAPSIZE" != "" ]; then
- JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
-fi
+#Override the log4j settings for all YARN apps
+# export YARN_ROOT_LOGGER="INFO,console"
+###
# Resource Manager specific parameters
+###
-# Specify the max Heapsize for the ResourceManager using a numerical value
+# Specify the max heapsize for the ResourceManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
-# This value will be overridden by an Xmx setting specified in either YARN_OPTS
-# and/or YARN_RESOURCEMANAGER_OPTS.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
+# HADOOP_OPTS, and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
#export YARN_RESOURCEMANAGER_HEAPSIZE=1000
-# Specify the max Heapsize for the timeline server using a numerical value
-# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
-# the value to 1000.
-# This value will be overridden by an Xmx setting specified in either YARN_OPTS
-# and/or YARN_TIMELINESERVER_OPTS.
-# If not specified, the default value will be picked from either YARN_HEAPMAX
-# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
-#export YARN_TIMELINESERVER_HEAPSIZE=1000
-
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
-#export YARN_RESOURCEMANAGER_OPTS=
+#
+# Examples for a Sun/Oracle JDK:
+# a) override the appsummary log file:
+# export YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log -Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY"
+#
+# b) Set JMX options
+# export YARN_RESOURCEMANAGER_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
+#
+# c) Set garbage collection logs from hadoop-env.sh
+# export YARN_RESOURCE_MANAGER_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+# d) ... or set them directly
+# export YARN_RESOURCEMANAGER_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+#
+# export YARN_RESOURCEMANAGER_OPTS=
+###
# Node Manager specific parameters
+###
# Specify the max Heapsize for the NodeManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
-# This value will be overridden by an Xmx setting specified in either YARN_OPTS
-# and/or YARN_NODEMANAGER_OPTS.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
+# HADOOP_OPTS, and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
#export YARN_NODEMANAGER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
+#
+# See ResourceManager for some examples
+#
#export YARN_NODEMANAGER_OPTS=
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
+###
+# TimeLineServer specifc parameters
+###
+# Specify the max Heapsize for the timeline server using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
+# HADOOP_OPTS, and/or YARN_TIMELINESERVER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
+#export YARN_TIMELINESERVER_HEAPSIZE=1000
-# default log directory & file
-if [ "$YARN_LOG_DIR" = "" ]; then
- YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
-fi
-if [ "$YARN_LOGFILE" = "" ]; then
- YARN_LOGFILE='yarn.log'
-fi
-
-# default policy file for service-level authorization
-if [ "$YARN_POLICYFILE" = "" ]; then
- YARN_POLICYFILE="hadoop-policy.xml"
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-MAC_OSX=false
-case "`uname`" in
-Darwin*) MAC_OSX=true;;
-esac
-
-if $MAC_OSX; then
- YARN_OPTS="$YARN_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
-fi
-
-
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
-YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
-YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
-fi
-YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
-
+# Specify the JVM options to be used when starting the TimeLineServer.
+# These options will be appended to the options specified as YARN_OPTS
+# and therefore may override any similar flags set in YARN_OPTS
+#
+# See ResourceManager for some examples
+#
+#export YARN_TIMELINESERVER_OPTS=