Merge from trunk to branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/fs-encryption@1619018 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2014-08-20 01:22:42 +00:00
commit d2a39b61aa
61 changed files with 3614 additions and 2034 deletions

View File

@ -29,6 +29,7 @@
<exclude>*-config.cmd</exclude> <exclude>*-config.cmd</exclude>
<exclude>start-*.cmd</exclude> <exclude>start-*.cmd</exclude>
<exclude>stop-*.cmd</exclude> <exclude>stop-*.cmd</exclude>
<exclude>hadoop-layout.sh.example</exclude>
</excludes> </excludes>
<fileMode>0755</fileMode> <fileMode>0755</fileMode>
</fileSet> </fileSet>
@ -42,6 +43,8 @@
<includes> <includes>
<include>*-config.sh</include> <include>*-config.sh</include>
<include>*-config.cmd</include> <include>*-config.cmd</include>
<include>*-functions.sh</include>
<include>hadoop-layout.sh.example</include>
</includes> </includes>
<fileMode>0755</fileMode> <fileMode>0755</fileMode>
</fileSet> </fileSet>
@ -57,6 +60,10 @@
<exclude>hadoop.cmd</exclude> <exclude>hadoop.cmd</exclude>
<exclude>hdfs.cmd</exclude> <exclude>hdfs.cmd</exclude>
<exclude>hadoop-config.cmd</exclude> <exclude>hadoop-config.cmd</exclude>
<exclude>hadoop-functions.sh</exclude>
<exclude>hadoop-layout.sh.example</exclude>
<exclude>hdfs-config.cmd</exclude>
<exclude>hdfs-config.sh</exclude>
</excludes> </excludes>
<fileMode>0755</fileMode> <fileMode>0755</fileMode>
</fileSet> </fileSet>

View File

@ -9,6 +9,8 @@ Trunk (Unreleased)
HADOOP-10474 Move o.a.h.record to hadoop-streaming. (wheat9) HADOOP-10474 Move o.a.h.record to hadoop-streaming. (wheat9)
HADOOP-9902. Shell script rewrite (aw)
NEW FEATURES NEW FEATURES
HADOOP-10433. Key Management Server based on KeyProvider API. (tucu) HADOOP-10433. Key Management Server based on KeyProvider API. (tucu)
@ -615,6 +617,15 @@ Release 2.6.0 - UNRELEASED
HADOOP-10973. Native Libraries Guide contains format error. (Peter Klavins HADOOP-10973. Native Libraries Guide contains format error. (Peter Klavins
via Arpit Agarwal) via Arpit Agarwal)
HADOOP-10972. Native Libraries Guide contains mis-spelt build line (Peter
Klavins via aw)
HADOOP-10873. Fix dead link in Configuration javadoc (Akira AJISAKA
via aw)
HADOOP-10968. hadoop native build fails to detect java_libarch on
ppc64le (Dinar Valeev via Colin Patrick McCabe)
Release 2.5.0 - UNRELEASED Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -78,6 +78,12 @@ IF("${CMAKE_SYSTEM}" MATCHES "Linux")
SET(_java_libarch "amd64") SET(_java_libarch "amd64")
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
SET(_java_libarch "arm") SET(_java_libarch "arm")
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
IF(EXISTS "${_JAVA_HOME}/jre/lib/ppc64le")
SET(_java_libarch "ppc64le")
ELSE()
SET(_java_libarch "ppc64")
ENDIF()
ELSE() ELSE()
SET(_java_libarch ${CMAKE_SYSTEM_PROCESSOR}) SET(_java_libarch ${CMAKE_SYSTEM_PROCESSOR})
ENDIF() ENDIF()

View File

@ -15,130 +15,164 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# This script runs the hadoop core commands. function hadoop_usage()
{
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
function print_usage(){
echo "Usage: hadoop [--config confdir] COMMAND" echo "Usage: hadoop [--config confdir] COMMAND"
echo " where COMMAND is one of:" echo " where COMMAND is one of:"
echo " fs run a generic filesystem user client" echo " archive -archiveName NAME -p <parent path> <src>* <dest>"
echo " version print the version" echo " create a Hadoop archive"
echo " jar <jar> run a jar file" echo " checknative [-a|-h] check native Hadoop and compression "
echo " checknative [-a|-h] check native hadoop and compression libraries availability" echo " libraries availability"
echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " classpath prints the class path needed to get the" echo " classpath prints the class path needed to get the"
echo " Hadoop jar and the required libraries"
echo " credential interact with credential providers" echo " credential interact with credential providers"
echo " Hadoop jar and the required libraries"
echo " daemonlog get/set the log level for each daemon" echo " daemonlog get/set the log level for each daemon"
echo " distch path:owner:group:permisson"
echo " distributed metadata changer"
echo " distcp <srcurl> <desturl> "
echo " copy file or directories recursively"
echo " fs run a generic filesystem user client"
echo " jar <jar> run a jar file"
echo " jnipath prints the java.library.path"
echo " key manage keys via the KeyProvider"
echo " version print the version"
echo " or" echo " or"
echo " CLASSNAME run the class named CLASSNAME" echo " CLASSNAME run the class named CLASSNAME"
echo "" echo ""
echo "Most commands print help when invoked w/o parameters." echo "Most commands print help when invoked w/o parameters."
} }
# This script runs the hadoop core commands.
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
if [ $# = 0 ]; then if [ $# = 0 ]; then
print_usage hadoop_exit_with_usage 1
exit
fi fi
COMMAND=$1 COMMAND=$1
case $COMMAND in shift
# usage flags
--help|-help|-h)
print_usage
exit
;;
#hdfs commands case ${COMMAND} in
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3) balancer|datanode|dfs|dfsadmin|dfsgroups| \
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2 namenode|secondarynamenode|fsck|fetchdt|oiv| \
echo "Instead use the hdfs command for it." 1>&2 portmap|nfs3)
echo "" 1>&2 hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
#try to locate hdfs and if present, delegate to it. COMMAND=${COMMAND/dfsgroups/groups}
shift hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${COMMAND}\" instead."
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then hadoop_error ""
exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@" #try to locate hdfs and if present, delegate to it.
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@" # shellcheck disable=SC2086
exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
elif [[ -f "${HADOOP_PREFIX}/bin/hdfs" ]]; then
# shellcheck disable=SC2086
exec "${HADOOP_PREFIX}/bin/hdfs" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else else
echo "HADOOP_HDFS_HOME not found!" hadoop_error "HADOOP_HDFS_HOME not found!"
exit 1 exit 1
fi fi
;; ;;
#mapred commands for backwards compatibility #mapred commands for backwards compatibility
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker) pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2 hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
echo "Instead use the mapred command for it." 1>&2 COMMAND=${COMMAND/mrgroups/groups}
echo "" 1>&2 hadoop_error "WARNING: Attempting to execute replacement \"mapred ${COMMAND}\" instead."
hadoop_error ""
#try to locate mapred and if present, delegate to it. #try to locate mapred and if present, delegate to it.
shift if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@" --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then elif [[ -f "${HADOOP_PREFIX}/bin/mapred" ]]; then
exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@" exec "${HADOOP_PREFIX}/bin/mapred" \
--config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
else else
echo "HADOOP_MAPRED_HOME not found!" hadoop_error "HADOOP_MAPRED_HOME not found!"
exit 1 exit 1
fi fi
;; ;;
archive)
#core commands CLASS=org.apache.hadoop.tools.HadoopArchives
*) hadoop_add_classpath "${TOOL_PATH}"
# the core commands ;;
if [ "$COMMAND" = "fs" ] ; then checknative)
CLASS=org.apache.hadoop.fs.FsShell CLASS=org.apache.hadoop.util.NativeLibraryChecker
elif [ "$COMMAND" = "version" ] ; then ;;
CLASS=org.apache.hadoop.util.VersionInfo classpath)
elif [ "$COMMAND" = "jar" ] ; then if [[ "$#" -eq 1 ]]; then
CLASS=org.apache.hadoop.util.RunJar CLASS=org.apache.hadoop.util.Classpath
elif [ "$COMMAND" = "key" ] ; then else
CLASS=org.apache.hadoop.crypto.key.KeyShell hadoop_finalize
elif [ "$COMMAND" = "checknative" ] ; then echo "${CLASSPATH}"
CLASS=org.apache.hadoop.util.NativeLibraryChecker exit 0
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
elif [ "$COMMAND" = "credential" ] ; then
CLASS=org.apache.hadoop.security.alias.CredentialShell
elif [ "$COMMAND" = "classpath" ] ; then
if [ "$#" -eq 1 ]; then
# No need to bother starting up a JVM for this simple case.
echo $CLASSPATH
exit
else
CLASS=org.apache.hadoop.util.Classpath
fi
elif [[ "$COMMAND" = -* ]] ; then
# class and package names cannot begin with a -
echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
exit 1
else
CLASS=$COMMAND
fi fi
shift ;;
credential)
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS CLASS=org.apache.hadoop.security.alias.CredentialShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" ;;
daemonlog)
#make sure security appender is turned off CLASS=org.apache.hadoop.log.LogLevel
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" ;;
distch)
export CLASSPATH=$CLASSPATH CLASS=org.apache.hadoop.tools.DistCh
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" hadoop_add_classpath "${TOOL_PATH}"
;; ;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_classpath "${TOOL_PATH}"
;;
fs)
CLASS=org.apache.hadoop.fs.FsShell
;;
jar)
CLASS=org.apache.hadoop.util.RunJar
;;
jnipath)
hadoop_finalize
echo "${JAVA_LIBRARY_PATH}"
exit 0
;;
key)
CLASS=org.apache.hadoop.crypto.key.KeyShell
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
;;
-*|hdfs)
hadoop_exit_with_usage 1
;;
*)
CLASS="${COMMAND}"
;;
esac esac
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
hadoop_finalize
export CLASSPATH
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"

View File

@ -1,3 +1,5 @@
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more # Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with # contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. # this work for additional information regarding copyright ownership.
@ -13,280 +15,176 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
####
# IMPORTANT
####
## The hadoop-config.sh tends to get executed by non-Hadoop scripts.
## Those parts expect this script to parse/manipulate $@. In order
## to maintain backward compatibility, this means a surprising
## lack of functions for bits that would be much better off in
## a function.
##
## In other words, yes, there is some bad things happen here and
## unless we break the rest of the ecosystem, we can't change it. :(
# included in all the hadoop scripts with source command # included in all the hadoop scripts with source command
# should not be executable directly # should not be executable directly
# also should not be passed any arguments, since we need original $* # also should not be passed any arguments, since we need original $*
# Resolve links ($0 may be a softlink) and convert a relative path
# to an absolute path. NB: The -P option requires bash built-ins
# or POSIX:2001 compliant cd and pwd.
# HADOOP_CLASSPATH Extra Java CLASSPATH entries.
#
# HADOOP_USER_CLASSPATH_FIRST When defined, the HADOOP_CLASSPATH is
# added in the beginning of the global
# classpath. Can be defined, for example,
# by doing
# export HADOOP_USER_CLASSPATH_FIRST=true
# #
# after doing more config, caller should also exec finalize
# function to finish last minute/default configs for
# settings that might be different between daemons & interactive
this="${BASH_SOURCE-$0}" # you must be this high to ride the ride
common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) if [[ -z "${BASH_VERSINFO}" ]] || [[ "${BASH_VERSINFO}" -lt 3 ]]; then
script="$(basename -- "$this")" echo "Hadoop requires bash v3 or better. Sorry."
this="$common_bin/$script" exit 1
[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
# the root of the Hadoop installation
# See HADOOP-6255 for directory structure layout
HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)
HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
export HADOOP_PREFIX
#check to see if the conf dir is given as an optional argument
if [ $# -gt 1 ]
then
if [ "--config" = "$1" ]
then
shift
confdir=$1
if [ ! -d "$confdir" ]; then
echo "Error: Cannot find configuration directory: $confdir"
exit 1
fi
shift
HADOOP_CONF_DIR=$confdir
fi
fi fi
# Allow alternate conf dir location. # In order to get partially bootstrapped, we need to figure out where
if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then # we are located. Chances are good that our caller has already done
DEFAULT_CONF_DIR="conf" # this work for us, but just in case...
if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
_hadoop_common_this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hadoop_common_this}")" >/dev/null && pwd -P)
fi
# get our functions defined for usage later
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh"
else else
DEFAULT_CONF_DIR="etc/hadoop" echo "ERROR: Unable to exec ${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh." 1>&2
fi
export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi
# User can specify hostnames or a file where the hostnames are (not both)
if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
echo \
"Error: Please specify one variable HADOOP_SLAVES or " \
"HADOOP_SLAVE_NAME and not both."
exit 1 exit 1
fi fi
# Process command line options that specify hosts or file with host # allow overrides of the above and pre-defines of the below
# list if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then
if [ $# -gt 1 ] . "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh"
then
if [ "--hosts" = "$1" ]
then
shift
export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$1"
shift
elif [ "--hostnames" = "$1" ]
then
shift
export HADOOP_SLAVE_NAMES=$1
shift
fi
fi fi
# User can specify hostnames or a file where the hostnames are (not both) #
# (same check as above but now we know it's command line options that cause # IMPORTANT! We are not executing user provided code yet!
# the problem) #
if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
echo \ # Let's go! Base definitions so we can move forward
"Error: Please specify one of --hosts or --hostnames options and not both." hadoop_bootstrap_init
exit 1
# let's find our conf.
#
# first, check and process params passed to us
# we process this in-line so that we can directly modify $@
# if something downstream is processing that directly,
# we need to make sure our params have been ripped out
# note that we do many of them here for various utilities.
# this provides consistency and forces a more consistent
# user experience
# save these off in case our caller needs them
# shellcheck disable=SC2034
HADOOP_USER_PARAMS="$@"
HADOOP_DAEMON_MODE="default"
while [[ -z "${_hadoop_common_done}" ]]; do
case $1 in
--buildpaths)
# shellcheck disable=SC2034
HADOOP_ENABLE_BUILD_PATHS=true
shift
;;
--config)
shift
confdir=$1
shift
if [[ -d "${confdir}" ]]; then
# shellcheck disable=SC2034
YARN_CONF_DIR="${confdir}"
# shellcheck disable=SC2034
HADOOP_CONF_DIR="${confdir}"
elif [[ -z "${confdir}" ]]; then
hadoop_error "ERROR: No parameter provided for --config "
hadoop_exit_with_usage 1
else
hadoop_error "ERROR: Cannot find configuration directory \"${confdir}\""
hadoop_exit_with_usage 1
fi
;;
--daemon)
shift
HADOOP_DAEMON_MODE=$1
shift
if [[ -z "${HADOOP_DAEMON_MODE}" || \
! "${HADOOP_DAEMON_MODE}" =~ ^st(art|op|atus)$ ]]; then
hadoop_error "ERROR: --daemon must be followed by either \"start\", \"stop\", or \"status\"."
hadoop_exit_with_usage 1
fi
;;
--help|-help|-h|help|--h|--\?|-\?|\?)
hadoop_exit_with_usage 0
;;
--hostnames)
shift
# shellcheck disable=SC2034
HADOOP_SLAVE_NAMES="$1"
shift
;;
--hosts)
shift
hadoop_populate_slaves_file "$1"
shift
;;
*)
_hadoop_common_done=true
;;
esac
done
hadoop_find_confdir
hadoop_exec_hadoopenv
#
# IMPORTANT! User provided code is now available!
#
# do all the OS-specific startup bits here
# this allows us to get a decent JAVA_HOME,
# call crle for LD_LIBRARY_PATH, etc.
hadoop_os_tricks
hadoop_java_setup
hadoop_basic_init
# inject any sub-project overrides, defaults, etc.
if declare -F hadoop_subproject_init >/dev/null ; then
hadoop_subproject_init
fi fi
# check if net.ipv6.bindv6only is set to 1 # get the native libs in there pretty quick
bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null) hadoop_add_javalibpath "${HADOOP_PREFIX}/build/native"
if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ] hadoop_add_javalibpath "${HADOOP_PREFIX}/${HADOOP_COMMON_LIB_NATIVE_DIR}"
then
echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken" # get the basic java class path for these subprojects
echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6" # in as quickly as possible since other stuff
exit 1 # will definitely depend upon it.
fi #
# at some point, this will get replaced with something pluggable
# Newer versions of glibc use an arena memory allocator that causes virtual # so that these functions can sit in their projects rather than
# memory usage to explode. This interacts badly with the many threads that # common
# we use in Hadoop. Tune the variable down to prevent vmem explosion. #
export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4} for i in common hdfs yarn mapred
do
# Attempt to set JAVA_HOME if it is not set hadoop_add_to_classpath_$i
if [[ -z $JAVA_HOME ]]; then done
# On OSX use java_home (or /Library for older versions)
if [ "Darwin" == "$(uname -s)" ]; then #
if [ -x /usr/libexec/java_home ]; then # backwards compatibility. new stuff should
export JAVA_HOME=($(/usr/libexec/java_home)) # call this when they are ready
else #
export JAVA_HOME=(/Library/Java/Home) if [[ -z "${HADOOP_NEW_CONFIG}" ]]; then
fi hadoop_finalize
fi
# Bail if we did not detect it
if [[ -z $JAVA_HOME ]]; then
echo "Error: JAVA_HOME is not set and could not be found." 1>&2
exit 1
fi
fi
JAVA=$JAVA_HOME/bin/java
# check envvars which might override default args
if [ "$HADOOP_HEAPSIZE" != "" ]; then
#echo "run with heapsize $HADOOP_HEAPSIZE"
JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
# CLASSPATH initially contains $HADOOP_CONF_DIR
CLASSPATH="${HADOOP_CONF_DIR}"
# so that filenames w/ spaces are handled correctly in loops below
IFS=
if [ "$HADOOP_COMMON_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then
export HADOOP_COMMON_HOME=$HADOOP_PREFIX
fi
fi
# for releases, add core hadoop jar & webapps to CLASSPATH
if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR
fi
if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'
# default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
if [ "$HADOOP_LOGFILE" = "" ]; then
HADOOP_LOGFILE='hadoop.log'
fi
# default policy file for service-level authorization
if [ "$HADOOP_POLICYFILE" = "" ]; then
HADOOP_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
# setup 'java.library.path' for native-hadoop code if necessary
if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
else
JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
fi
fi
fi
# setup a default TOOL_PATH
TOOL_PATH="${TOOL_PATH:-$HADOOP_PREFIX/share/hadoop/tools/lib/*}"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
# Disable ipv6 as it can cause issues
HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
# put hdfs in classpath if present
if [ "$HADOOP_HDFS_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then
export HADOOP_HDFS_HOME=$HADOOP_PREFIX
fi
fi
if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR
fi
if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*'
# put yarn in classpath if present
if [ "$HADOOP_YARN_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then
export HADOOP_YARN_HOME=$HADOOP_PREFIX
fi
fi
if [ -d "$HADOOP_YARN_HOME/$YARN_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR
fi
if [ -d "$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR'/*'
# put mapred in classpath if present AND different from YARN
if [ "$HADOOP_MAPRED_HOME" = "" ]; then
if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then
export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
fi
fi
if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then
if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR
fi
if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*'
fi
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'
fi
# Add the user-specified CLASSPATH via HADOOP_CLASSPATH
# Add it first or last depending on if user has
# set env-var HADOOP_USER_CLASSPATH_FIRST
if [ "$HADOOP_CLASSPATH" != "" ]; then
# Prefix it if its to be preceded
if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then
CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH}
else
CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
fi
fi fi

View File

@ -15,200 +15,42 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
function hadoop_usage
{
echo "Usage: hadoop-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
}
# Runs a Hadoop command as a daemon. # let's locate libexec...
# if [[ -n "${HADOOP_PREFIX}" ]]; then
# Environment Variables DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
# else
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf. this="${BASH_SOURCE-$0}"
# HADOOP_LOG_DIR Where log files are stored. PWD by default. bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# HADOOP_MASTER host:path where hadoop code should be rsync'd from DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
# HADOOP_PID_DIR The pid files are stored. /tmp by default. fi
# HADOOP_IDENT_STRING A string representing this instance of hadoop. $USER by default
# HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
##
usage="Usage: hadoop-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] [--script script] (start|stop) <hadoop-command> <args...>" HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
# if no args specified, show usage HADOOP_NEW_CONFIG=true
if [ $# -le 1 ]; then if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
echo $usage . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1 exit 1
fi fi
bin=`dirname "${BASH_SOURCE-$0}"` if [[ $# = 0 ]]; then
bin=`cd "$bin"; pwd` hadoop_exit_with_usage 1
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# get arguments
#default value
hadoopScript="$HADOOP_PREFIX"/bin/hadoop
if [ "--script" = "$1" ]
then
shift
hadoopScript=$1
shift
fi fi
startStop=$1
shift daemonmode=$1
command=$1
shift shift
hadoop_rotate_log () if [[ -z "${HADOOP_HDFS_HOME}" ]]; then
{ hdfsscript="${HADOOP_PREFIX}/bin/hdfs"
log=$1; else
num=5; hdfsscript="${HADOOP_HDFS_HOME}/bin/hdfs"
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi fi
# Determine if we're starting a secure datanode, and if so, redefine appropriate variables exec "$hdfsscript" --config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
starting_secure_dn="true"
fi
#Determine if we're starting a privileged NFS, if so, redefine the appropriate variables
if [ "$command" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
export HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
export HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
export HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
starting_privileged_nfs="true"
fi
if [ "$HADOOP_IDENT_STRING" = "" ]; then
export HADOOP_IDENT_STRING="$USER"
fi
# get log directory
if [ "$HADOOP_LOG_DIR" = "" ]; then
export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
fi
if [ ! -w "$HADOOP_LOG_DIR" ] ; then
mkdir -p "$HADOOP_LOG_DIR"
chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
fi
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_PID_DIR=/tmp
fi
# some variables
export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-"INFO,RFA"}
export HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-"INFO,RFAS"}
export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-"INFO,NullAppender"}
log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$HADOOP_NICENESS" = "" ]; then
export HADOOP_NICENESS=0
fi
case $startStop in
(start)
[ -w "$HADOOP_PID_DIR" ] || mkdir -p "$HADOOP_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
exit 1
fi
fi
if [ "$HADOOP_MASTER" != "" ]; then
echo rsync from $HADOOP_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_PREFIX"
case $command in
namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc)
if [ -z "$HADOOP_HDFS_HOME" ]; then
hdfsScript="$HADOOP_PREFIX"/bin/hdfs
else
hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs
fi
nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
;;
(*)
nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
;;
esac
echo $! > $pid
sleep 1
head "$log"
# capture the ulimit output
if [ "true" = "$starting_secure_dn" ]; then
echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
# capture the ulimit info for the appropriate user
su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
elif [ "true" = "$starting_privileged_nfs" ]; then
echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
else
echo "ulimit -a for user $USER" >> $log
ulimit -a >> $log 2>&1
fi
sleep 3;
if ! ps -p $! > /dev/null ; then
exit 1
fi
;;
(stop)
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $HADOOP_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no $command to stop
fi
rm -f $pid
else
echo no $command to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac

View File

@ -18,19 +18,34 @@
# Run a Hadoop command on all slave hosts. # Run a Hadoop command on all slave hosts.
usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..." function hadoop_usage
{
echo "Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) <hadoop-command> <args...>"
}
# if no args specified, show usage this="${BASH_SOURCE-$0}"
if [ $# -le 1 ]; then bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
echo $usage
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1 exit 1
fi fi
bin=`dirname "${BASH_SOURCE-$0}"` if [[ $# = 0 ]]; then
bin=`cd "$bin"; pwd` hadoop_exit_with_usage 1
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec hadoop_connect_to_hosts "${bin}/hadoop-daemon.sh" \
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} --config "${HADOOP_CONF_DIR}" "$@"
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,93 @@
# Copyright 2014 The Apache Software Foundation
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
## VENDORS!
##
## This is where you can redefine the layout of Hadoop directories
## and expect to be reasonably compatible. Needless to say, this
## is expert level stuff and one needs to tread carefully.
##
## If you move HADOOP_LIBEXEC_DIR from some location that
## isn't bin/../libexec, you MUST define either HADOOP_LIBEXEC_DIR
## or have HADOOP_PREFIX/libexec/hadoop-config.sh and
## HADOOP_PREFIX/libexec/hadoop-layout.sh (this file) exist.
## NOTE:
##
## hadoop-functions.sh gets executed BEFORE this file. So you can
## redefine all of those functions here.
##
## *-env.sh get executed AFTER this file but generally too late to
## override the settings (but not the functions!) here. However, this
## also means you cannot use things like HADOOP_CONF_DIR for these
## definitions.
####
# Common disk layout
####
# Default location for the common/core Hadoop project
# export HADOOP_COMMON_HOME=$HADOOP_PREFIX
# Relative locations where components under HADOOP_COMMON_HOME are located
# export HADOOP_COMMON_DIR="share/hadoop/common"
# export HADOOP_COMMON_LIB_JARS_DIR="share/hadoop/common/lib"
# export HADOOP_COMMON_LIB_NATIVE_DIR="lib/native"
####
# HDFS disk layout
####
# Default location for the HDFS subproject
# export HADOOP_HDFS_HOME=$HADOOP_PREFIX
# Relative locations where components under HADOOP_HDFS_HOME are located
# export HDFS_DIR="share/hadoop/hdfs"
# export HDFS_LIB_JARS_DIR="share/hadoop/hdfs/lib"
####
# YARN disk layout
####
# Default location for the YARN subproject
# export HADOOP_YARN_HOME=$HADOOP_PREFIX
# Relative locations where components under HADOOP_YARN_HOME are located
# export YARN_DIR="share/hadoop/yarn"
# export YARN_LIB_JARS_DIR="share/hadoop/yarn/lib"
# Default location for the MapReduce subproject
# export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
####
# MapReduce disk layout
####
# Relative locations where components under HADOOP_MAPRED_HOME are located
# export MAPRED_DIR="share/hadoop/mapreduce"
# export MAPRED_LIB_JARS_DIR="share/hadoop/mapreduce/lib"
####
# Misc paths
####
# setup a default TOOL_PATH, where things like distcp lives
# note that this path only gets added for certain commands and not
# part of the general classpath
# export TOOL_PATH="$HADOOP_PREFIX/share/hadoop/tools/lib/*"

View File

@ -15,47 +15,28 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# This script runs the hadoop core commands.
# The Hadoop record compiler this="${BASH_SOURCE-$0}"
# bin=$(cd -P -- "$(dirname -- "$this")" >/dev/null && pwd -P)
# Environment Variables script="$(basename -- "$this")"
# this="$bin/$script"
# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
# HADOOP_OPTS Extra Java runtime options.
#
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
#
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh HADOOP_NEW_CONFIG=true
. "$HADOOP_LIBEXEC_DIR/hadoop-config.sh"
if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then if [ $# = 0 ]; then
. "${HADOOP_CONF_DIR}/hadoop-env.sh" hadoop_exit_with_usage 1
fi fi
# some Java parameters
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
# restore ordinary behaviour
unset IFS
CLASS='org.apache.hadoop.record.compiler.generated.Rcc' CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
# run it # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@" HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
hadoop_add_param HADOOP_OPTS Xmx "$JAVA_HEAP_MAX"
hadoop_finalize
export CLASSPATH
hadoop_java_exec rcc "${CLASS}" "$@"

View File

@ -27,38 +27,33 @@
# HADOOP_SSH_OPTS Options passed to ssh when running remote commands. # HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
## ##
usage="Usage: slaves.sh [--config confdir] command..." function hadoop_usage {
echo "Usage: slaves.sh [--config confdir] command..."
}
# if no args specified, show usage # let's locate libexec...
if [ $# -le 0 ]; then if [[ -n "${HADOOP_PREFIX}" ]]; then
echo $usage DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1 exit 1
fi fi
bin=`dirname "${BASH_SOURCE-$0}"` # if no args specified, show usage
bin=`cd "$bin"; pwd` if [[ $# -le 0 ]]; then
hadoop_exit_with_usage 1
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
# Where to start the script, see hadoop-config.sh
# (it set up the variables based on command line options)
if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
SLAVE_NAMES=$HADOOP_SLAVE_NAMES
else
SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed 's/#.*$//;/^$/d')
fi fi
# start the daemons hadoop_connect_to_hosts "$@"
for slave in $SLAVE_NAMES ; do
ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
2>&1 | sed "s/^/$slave: /" &
if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
sleep $HADOOP_SLAVE_SLEEP
fi
done
wait

View File

@ -15,24 +15,38 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
echo "This script is deprecated. Use start-dfs.sh and start-yarn.sh instead."
exit 1
# Start all hadoop daemons. Run this on master node.
echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"
bin=`dirname "${BASH_SOURCE-$0}"` # let's locate libexec...
bin=`cd "$bin"; pwd` if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
DEFAULT_LIBEXEC_DIR="$bin"/../libexec else
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} this="${BASH_SOURCE-$0}"
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
# start hdfs daemons if hdfs is present # start hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then if [[ -f "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" ]]; then
"${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" --config "${HADOOP_CONF_DIR}"
fi fi
# start yarn daemons if yarn is present # start yarn daemons if yarn is present
if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then if [[ -f "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" ]]; then
"${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" --config "${HADOOP_CONF_DIR}"
fi fi

View File

@ -18,21 +18,35 @@
# Stop all hadoop daemons. Run this on master node. # Stop all hadoop daemons. Run this on master node.
echo "This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh" echo "This script is deprecated. Use stop-dfs.sh and stop-yarn.sh instead."
exit 1
bin=`dirname "${BASH_SOURCE-$0}"` # let's locate libexec...
bin=`cd "$bin"; pwd` if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # shellcheck disable=SC2034
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
exit 1
fi
# stop hdfs daemons if hdfs is present # stop hdfs daemons if hdfs is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh ]; then if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" ]]; then
"${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh --config $HADOOP_CONF_DIR "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" --config "${HADOOP_CONF_DIR}"
fi fi
# stop yarn daemons if yarn is present # stop yarn daemons if yarn is present
if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh ]; then if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" ]]; then
"${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh --config $HADOOP_CONF_DIR "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" --config "${HADOOP_CONF_DIR}"
fi fi

View File

@ -1,3 +1,4 @@
#
# Licensed to the Apache Software Foundation (ASF) under one # Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file # or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information # distributed with this work for additional information
@ -16,71 +17,393 @@
# Set Hadoop-specific environment variables here. # Set Hadoop-specific environment variables here.
# The only required environment variable is JAVA_HOME. All others are ##
# optional. When running a distributed configuration it is best to ## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
# set JAVA_HOME in this file, so that it is correctly defined on ## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS. THEREFORE,
# remote nodes. ## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
##
## Precedence rules:
##
## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
##
## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
##
# Many of the options here are built from the perspective that users
# may want to provide OVERWRITING values on the command line.
# For example:
#
# JAVA_HOME=/usr/java/testing hdfs dfs -ls
#
# Therefore, the vast majority (BUT NOT ALL!) of these defaults
# are configured for substitution and not append. If you would
# like append, you'll # need to modify this file accordingly.
###
# Generic settings for HADOOP
###
# Technically, the only required environment variable is JAVA_HOME.
# All others are optional. However, our defaults are probably not
# your defaults. Many sites configure these options outside of Hadoop,
# such as in /etc/profile.d
# The java implementation to use. # The java implementation to use.
export JAVA_HOME=${JAVA_HOME} export JAVA_HOME=${JAVA_HOME:-"hadoop-env.sh is not configured"}
# The jsvc implementation to use. Jsvc is required to run secure datanodes. # Location of Hadoop's configuration information. i.e., where this
#export JSVC_HOME=${JSVC_HOME} # file is probably living. You will almost certainly want to set
# this in /etc/profile.d or equivalent.
# export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} # The maximum amount of heap to use, in MB. Default is 1024.
# export HADOOP_HEAPSIZE=1024
# Extra Java CLASSPATH elements. Automatically insert capacity-scheduler. # Extra Java runtime options for all Hadoop commands. We don't support
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do # IPv6 yet/still, so by default we set preference to IPv4.
if [ "$HADOOP_CLASSPATH" ]; then # export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
# The maximum amount of heap to use, in MB. Default is 1000. # Some parts of the shell code may do special things dependent upon
#export HADOOP_HEAPSIZE= # the operating system. We have to set this here. See the next
#export HADOOP_NAMENODE_INIT_HEAPSIZE="" # section as to why....
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
# Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
MAC_OSX=false # Under certain conditions, Java on OS X will throw SCDynamicStore errors
case "`uname`" in # in the system logs.
Darwin*) MAC_OSX=true;; # See HADOOP-8719 for more information. If you need Kerberos
# support on OS X, you'll want to change/remove this extra bit.
case ${HADOOP_OS_TYPE} in
Darwin*)
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
;;
esac esac
if $MAC_OSX; then
export HADOOP_OPTS="$HADOOP_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
fi
# Command specific options appended to HADOOP_OPTS when specified # Extra Java runtime options for Hadoop clients (i.e., hdfs dfs -blah)
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS" # These get added to HADOOP_OPTS for such commands. In most cases,
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" # this should be left empty and let users supply it on the
# command line.
# extra HADOOP_CLIENT_OPTS=""
export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" #
# A note about classpaths.
#
# The classpath is configured such that entries are stripped prior
# to handing to Java based either upon duplication or non-existence.
# Wildcards and/or directories are *NOT* expanded as the
# de-duplication is fairly simple. So if two directories are in
# the classpath that both contain awesome-methods-1.0.jar,
# awesome-methods-1.0.jar will still be seen by java. But if
# the classpath specifically has awesome-methods-1.0.jar from the
# same directory listed twice, the last one will be removed.
#
export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS" # An additional, custom CLASSPATH. This is really meant for
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" # end users, but as an administrator, one might want to push
# something extra in here too, such as the jar to the topology
# method. Just be sure to append to the existing HADOOP_USER_CLASSPATH
# so end users have a way to add stuff.
# export HADOOP_USER_CLASSPATH="/some/cool/path/on/your/machine"
# The following applies to multiple commands (fs, dfs, fsck, distcp etc) # Should HADOOP_USER_CLASSPATH be first in the official CLASSPATH?
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" # export HADOOP_USER_CLASSPATH_FIRST="yes"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
# On secure datanodes, user to run the datanode as after dropping privileges ###
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} # Options for remote shell connectivity
###
# Where log files are stored. $HADOOP_HOME/logs by default. # There are some optional components of hadoop that allow for
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER # command and control of remote hosts. For example,
# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
# Where log files are stored in the secure data environment. # Options to pass to SSH when one of the "log into a host and
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} # start/stop daemons" scripts is executed
# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
# The directory where pid files are stored. /tmp by default. # The built-in ssh handler will limit itself to 10 simultaneous connections.
# NOTE: this should be set to a directory that can only be written to by # For pdsh users, this sets the fanout size ( -f )
# the user that will run the hadoop daemons. Otherwise there is the # Change this to increase/decrease as necessary.
# potential for a symlink attack. # export HADOOP_SSH_PARALLEL=10
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} # Filename which contains all of the hosts for any remote execution
# helper scripts # such as slaves.sh, start-dfs.sh, etc.
# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves"
###
# Options for all daemons
###
#
#
# You can define variables right here and then re-use them later on.
# For example, it is common to use the same garbage collection settings
# for all the daemons. So we could define:
#
# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
#
# .. and then use it as per the b option under the namenode.
# Where (primarily) daemon log files are stored.
# $HADOOP_PREFIX/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_PREFIX}/logs
# A string representing this instance of hadoop. $USER by default. # A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER # This is used in writing log and pid files, so keep that in mind!
# export HADOOP_IDENT_STRING=$USER
# How many seconds to pause after stopping a daemon
# export HADOOP_STOP_TIMEOUT=5
# Where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/tmp
# Default log level and output location
# This sets the hadoop.root.logger property
# export HADOOP_ROOT_LOGGER=INFO,console
# Default log level for daemons spawned explicitly by hadoop-daemon.sh
# This sets the hadoop.root.logger property
# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
# Default log level and output location for security-related messages.
# It sets -Dhadoop.security.logger on the command line.
# You will almost certainly want to change this on a per-daemon basis!
# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
# Default log level for file system audit messages.
# It sets -Dhdfs.audit.logger on the command line.
# You will almost certainly want to change this on a per-daemon basis!
# export HADOOP_AUDIT_LOGGER=INFO,NullAppender
# Default process priority level
# Note that sub-processes will also run at this level!
# export HADOOP_NICENESS=0
# Default name for the service level authorization file
# export HADOOP_POLICYFILE="hadoop-policy.xml"
###
# Secure/privileged execution
###
#
# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
# on privileged ports. This functionality can be replaced by providing
# custom functions. See hadoop-functions.sh for more information.
#
# The jsvc implementation to use. Jsvc is required to run secure datanodes.
# export JSVC_HOME=/usr/bin
#
# This directory contains pids for secure and privileged processes.
#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
#
# This directory contains the logs for secure and privileged processes.
# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
#
# When running a secure daemon, the default value of HADOOP_IDENT_STRING
# ends up being a bit bogus. Therefore, by default, the code will
# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER. If you want
# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
# export HADOOP_SECURE_IDENT_PRESERVE="true"
###
# NameNode specific parameters
###
# Specify the JVM options to be used when starting the NameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# a) Set JMX options
# export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
#
# b) Set garbage collection logs
# export HADOOP_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
# c) ... or set them directly
# export HADOOP_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
# this is the default:
# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
###
# SecondaryNameNode specific parameters
###
# Specify the JVM options to be used when starting the SecondaryNameNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
###
# DataNode specific parameters
###
# Specify the JVM options to be used when starting the DataNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# This is the default:
# export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
# On secure datanodes, user to run the datanode as after dropping privileges
# This **MUST** be uncommented to enable secure HDFS!
# export HADOOP_SECURE_DN_USER=hdfs
# Supplemental options for secure datanodes
# By default, we use jsvc which needs to know to launch a
# server jvm.
# export HADOOP_DN_SECURE_EXTRA_OPTS="-jvm server"
# Where datanode log files are stored in the secure data environment.
# export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
# Where datanode pid files are stored in the secure data environment.
# export HADOOP_SECURE_DN_PID_DIR=${HADOOP_SECURE_PID_DIR}
###
# NFS3 Gateway specific parameters
###
# Specify the JVM options to be used when starting the NFS3 Gateway.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_NFS3_OPTS=""
# Specify the JVM options to be used when starting the Hadoop portmapper.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_PORTMAP_OPTS="-Xmx512m"
# Supplemental options for priviliged gateways
# By default, we use jsvc which needs to know to launch a
# server jvm.
# export HADOOP_NFS3_SECURE_EXTRA_OPTS="-jvm server"
# On privileged gateways, user to run the gateway as after dropping privileges
# export HADOOP_PRIVILEGED_NFS_USER=nfsserver
###
# ZKFailoverController specific parameters
###
# Specify the JVM options to be used when starting the ZKFailoverController.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_ZKFC_OPTS=""
###
# QuorumJournalNode specific parameters
###
# Specify the JVM options to be used when starting the QuorumJournalNode.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_JOURNALNODE_OPTS=""
###
# HDFS Balancer specific parameters
###
# Specify the JVM options to be used when starting the HDFS Balancer.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_BALANCER_OPTS=""
###
# Advanced Users Only!
###
#
# When building Hadoop, you can add the class paths to your commands
# via this special env var:
# HADOOP_ENABLE_BUILD_PATHS="true"
# You can do things like replace parts of the shell underbelly.
# Most of this code is in hadoop-functions.sh.
#
#
# For example, if you want to add compression to the rotation
# menthod for the .out files that daemons generate, you can do
# that by redefining the hadoop_rotate_log function by
# uncommenting this code block:
#function hadoop_rotate_log
#{
# #
# # log rotation (mainly used for .out files)
# # Users are likely to replace this one for something
# # that gzips or uses dates or who knows what.
# #
# # be aware that &1 and &2 might go through here
# # so don't do anything too crazy...
# #
# local log=$1;
# local num=${2:-5};
#
# if [[ -f "${log}" ]]; then # rotate logs
# while [[ ${num} -gt 1 ]]; do
# #shellcheck disable=SC2086
# let prev=${num}-1
# if [[ -f "${log}.${prev}" ]]; then
# mv "${log}.${prev}" "${log}.${num}"
# fi
# num=${prev}
# done
# mv "${log}" "${log}.${num}"
# gzip -9 "${log}.${num}"
# fi
#}
#
#
# Another example: finding java
#
# By default, Hadoop assumes that $JAVA_HOME is always defined
# outside of its configuration. Eons ago, Apple standardized
# on a helper program called java_home to find it for you.
#
#function hadoop_java_setup
#{
#
# if [[ -z "${JAVA_HOME}" ]]; then
# case $HADOOP_OS_TYPE in
# Darwin*)
# JAVA_HOME=$(/usr/libexec/java_home)
# ;;
# esac
# fi
#
# # Bail if we did not detect it
# if [[ -z "${JAVA_HOME}" ]]; then
# echo "ERROR: JAVA_HOME is not set and could not be found." 1>&2
# exit 1
# fi
#
# if [[ ! -d "${JAVA_HOME}" ]]; then
# echo "ERROR: JAVA_HOME (${JAVA_HOME}) does not exist." 1>&2
# exit 1
# fi
#
# JAVA="${JAVA_HOME}/bin/java"
#
# if [[ ! -x ${JAVA} ]]; then
# echo "ERROR: ${JAVA} is not executable." 1>&2
# exit 1
# fi
# JAVA_HEAP_MAX=-Xmx1g
# HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-128}
#
# # check envvars which might override default args
# if [[ -n "$HADOOP_HEAPSIZE" ]]; then
# JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
# fi
#}

View File

@ -110,8 +110,9 @@
* *
* <p>Unless explicitly turned off, Hadoop by default specifies two * <p>Unless explicitly turned off, Hadoop by default specifies two
* resources, loaded in-order from the classpath: <ol> * resources, loaded in-order from the classpath: <ol>
* <li><tt><a href="{@docRoot}/../core-default.html">core-default.xml</a> * <li><tt>
* </tt>: Read-only defaults for hadoop.</li> * <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
* core-default.xml</a></tt>: Read-only defaults for hadoop.</li>
* <li><tt>core-site.xml</tt>: Site-specific configuration for a given hadoop * <li><tt>core-site.xml</tt>: Site-specific configuration for a given hadoop
* installation.</li> * installation.</li>
* </ol> * </ol>

View File

@ -129,7 +129,7 @@ Native Libraries Guide
library: library:
---- ----
$ mvn package -Pdist,native -Dskiptests -Dtar $ mvn package -Pdist,native -DskipTests -Dtar
---- ----
You should see the newly-built library in: You should see the newly-built library in:

View File

@ -402,6 +402,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6850. Move NFS out of order write unit tests into TestWrites class. HDFS-6850. Move NFS out of order write unit tests into TestWrites class.
(Zhe Zhang via atm) (Zhe Zhang via atm)
HDFS-6188. An ip whitelist based implementation of TrustedChannelResolver.
(Benoy Antony via Arpit Agarwal)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang) HDFS-6690. Deduplicate xattr names in memory. (wang)
@ -514,6 +517,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6825. Edit log corruption due to delayed block removal. HDFS-6825. Edit log corruption due to delayed block removal.
(Yongjun Zhang via wang) (Yongjun Zhang via wang)
HDFS-6569. OOB message can't be sent to the client when DataNode shuts down for upgrade
(brandonli)
Release 2.5.0 - UNRELEASED Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -57,9 +57,9 @@ excludeFilenameRemote=$("$HADOOP_PREFIX/bin/hdfs" getconf -excludeFile)
if [ "$excludeFilenameRemote" = '' ] ; then if [ "$excludeFilenameRemote" = '' ] ; then
echo \ echo \
"Error: hdfs getconf -excludeFile returned empty string, " \ "Error: hdfs getconf -excludeFile returned empty string, " \
"please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \ "please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
"configuration and on all namenodes" "configuration and on all namenodes"
exit 1 exit 1
fi fi

View File

@ -15,253 +15,241 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Environment Variables function hadoop_usage
# {
# JSVC_HOME home directory of jsvc binary. Required for starting secure echo "Usage: hdfs [--config confdir] [--daemon (start|stop|status)] COMMAND"
# datanode.
#
# JSVC_OUTFILE path to jsvc output file. Defaults to
# $HADOOP_LOG_DIR/jsvc.out.
#
# JSVC_ERRFILE path to jsvc error file. Defaults to $HADOOP_LOG_DIR/jsvc.err.
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
function print_usage(){
echo "Usage: hdfs [--config confdir] COMMAND"
echo " where COMMAND is one of:" echo " where COMMAND is one of:"
echo " dfs run a filesystem command on the file systems supported in Hadoop."
echo " namenode -format format the DFS filesystem"
echo " secondarynamenode run the DFS secondary namenode"
echo " namenode run the DFS namenode"
echo " journalnode run the DFS journalnode"
echo " zkfc run the ZK Failover Controller daemon"
echo " datanode run a DFS datanode"
echo " dfsadmin run a DFS admin client"
echo " haadmin run a DFS HA admin client"
echo " fsck run a DFS filesystem checking utility"
echo " balancer run a cluster balancing utility" echo " balancer run a cluster balancing utility"
echo " jmxget get JMX exported values from NameNode or DataNode." echo " cacheadmin configure the HDFS cache"
echo " oiv apply the offline fsimage viewer to an fsimage" echo " classpath prints the class path needed to get the"
echo " oiv_legacy apply the offline fsimage viewer to an legacy fsimage" echo " Hadoop jar and the required libraries"
echo " oev apply the offline edits viewer to an edits file" echo " datanode run a DFS datanode"
echo " dfs run a filesystem command on the file system"
echo " dfsadmin run a DFS admin client"
echo " fetchdt fetch a delegation token from the NameNode" echo " fetchdt fetch a delegation token from the NameNode"
echo " fsck run a DFS filesystem checking utility"
echo " getconf get config values from configuration" echo " getconf get config values from configuration"
echo " groups get the groups which users belong to" echo " groups get the groups which users belong to"
echo " haadmin run a DFS HA admin client"
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " journalnode run the DFS journalnode"
echo " lsSnapshottableDir list all snapshottable dirs owned by the current user"
echo " Use -help to see options"
echo " namenode run the DFS namenode"
echo " Use -format to initialize the DFS filesystem"
echo " nfs3 run an NFS version 3 gateway"
echo " oev apply the offline edits viewer to an edits file"
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oiv_legacy apply the offline fsimage viewer to a legacy fsimage"
echo " portmap run a portmap service"
echo " secondarynamenode run the DFS secondary namenode"
echo " snapshotDiff diff two snapshots of a directory or diff the" echo " snapshotDiff diff two snapshots of a directory or diff the"
echo " current directory contents with a snapshot" echo " current directory contents with a snapshot"
echo " lsSnapshottableDir list all snapshottable dirs owned by the current user" echo " zkfc run the ZK Failover Controller daemon"
echo " Use -help to see options"
echo " portmap run a portmap service"
echo " nfs3 run an NFS version 3 gateway"
echo " cacheadmin configure the HDFS cache"
echo " crypto configure HDFS encryption zones" echo " crypto configure HDFS encryption zones"
echo "" echo ""
echo "Most commands print help when invoked w/o parameters." echo "Most commands print help when invoked w/o parameters."
} }
if [ $# = 0 ]; then # let's locate libexec...
print_usage if [[ -n "${HADOOP_PREFIX}" ]]; then
exit DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi fi
COMMAND=$1 COMMAND=$1
shift shift
case $COMMAND in case ${COMMAND} in
# usage flags balancer)
--help|-help|-h) CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
print_usage HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}"
;;
cacheadmin)
CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
;;
classpath)
hadoop_finalize
echo "${CLASSPATH}"
exit exit
;; ;;
crypto)
CLASS=org.apache.hadoop.hdfs.tools.CryptoAdmin
;;
datanode)
daemon="true"
# Determine if we're starting a secure datanode, and
# if so, redefine appropriate variables
if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
secure_service="true"
secure_user="${HADOOP_SECURE_DN_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS} ${HADOOP_DATANODE_OPTS}"
CLASS="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
else
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}"
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
fi
;;
dfs)
CLASS=org.apache.hadoop.fs.FsShell
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
dfsadmin)
CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
fetchdt)
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
;;
fsck)
CLASS=org.apache.hadoop.hdfs.tools.DFSck
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
getconf)
CLASS=org.apache.hadoop.hdfs.tools.GetConf
;;
groups)
CLASS=org.apache.hadoop.hdfs.tools.GetGroups
;;
haadmin)
CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
CLASSPATH="${CLASSPATH}:${TOOL_PATH}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
journalnode)
daemon="true"
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}"
;;
jmxget)
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
;;
lsSnapshottableDir)
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
;;
namenode)
daemon="true"
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}"
;;
nfs3)
daemon="true"
if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
secure_service="true"
secure_user="${HADOOP_PRIVILEGED_NFS_USER}"
# backward compatiblity
HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS} ${HADOOP_NFS3_OPTS}"
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
else
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}"
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
fi
;;
oev)
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
;;
oiv)
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
;;
oiv_legacy)
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
;;
portmap)
daemon="true"
CLASS=org.apache.hadoop.portmap.Portmap
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}"
;;
secondarynamenode)
daemon="true"
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}"
;;
snapshotDiff)
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
;;
zkfc)
daemon="true"
CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}"
;;
-*)
hadoop_exit_with_usage 1
;;
*)
CLASS="${COMMAND}"
;;
esac esac
# Determine if we're starting a secure datanode, and if so, redefine appropriate variables if [[ -n "${secure_service}" ]]; then
if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then HADOOP_SECURE_USER="${secure_user}"
if [ -n "$JSVC_HOME" ]; then if hadoop_verify_secure_prereq; then
if [ -n "$HADOOP_SECURE_DN_PID_DIR" ]; then hadoop_setup_secure_service
HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.out"
fi priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.err"
priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.pid"
if [ -n "$HADOOP_SECURE_DN_LOG_DIR" ]; then daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
fi
HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
starting_secure_dn="true"
else
echo "It looks like you're trying to start a secure DN, but \$JSVC_HOME"\
"isn't set. Falling back to starting insecure DN."
fi fi
fi
# Determine if we're starting a privileged NFS daemon, and if so, redefine appropriate variables
if [ "$COMMAND" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
if [ -n "$JSVC_HOME" ]; then
if [ -n "$HADOOP_PRIVILEGED_NFS_PID_DIR" ]; then
HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
fi
if [ -n "$HADOOP_PRIVILEGED_NFS_LOG_DIR" ]; then
HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
fi
HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
starting_privileged_nfs="true"
else
echo "It looks like you're trying to start a privileged NFS server, but"\
"\$JSVC_HOME isn't set. Falling back to starting unprivileged NFS server."
fi
fi
if [ "$COMMAND" = "namenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
elif [ "$COMMAND" = "zkfc" ] ; then
CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"
elif [ "$COMMAND" = "secondarynamenode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
elif [ "$COMMAND" = "datanode" ] ; then
CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
if [ "$starting_secure_dn" = "true" ]; then
HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
else
HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
fi
elif [ "$COMMAND" = "journalnode" ] ; then
CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"
elif [ "$COMMAND" = "dfs" ] ; then
CLASS=org.apache.hadoop.fs.FsShell
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "dfsadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "haadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "fsck" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DFSck
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
elif [ "$COMMAND" = "oiv" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
elif [ "$COMMAND" = "oiv_legacy" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
elif [ "$COMMAND" = "fetchdt" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
elif [ "$COMMAND" = "getconf" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.GetConf
elif [ "$COMMAND" = "groups" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.GetGroups
elif [ "$COMMAND" = "snapshotDiff" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
elif [ "$COMMAND" = "portmap" ] ; then
CLASS=org.apache.hadoop.portmap.Portmap
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_PORTMAP_OPTS"
elif [ "$COMMAND" = "nfs3" ] ; then
CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NFS3_OPTS"
elif [ "$COMMAND" = "cacheadmin" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
elif [ "$COMMAND" = "crypto" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.CryptoAdmin
else else
CLASS="$COMMAND" daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
fi fi
export CLASSPATH=$CLASSPATH if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
if [[ -n "${secure_service}" ]]; then
# Check to see if we should start a secure datanode # shellcheck disable=SC2034
if [ "$starting_secure_dn" = "true" ]; then HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_SECURE_DN_PID="/tmp/hadoop_secure_dn.pid"
else else
HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid" # shellcheck disable=SC2034
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi fi
fi
JSVC=$JSVC_HOME/jsvc hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
if [ ! -f $JSVC ]; then hadoop_finalize
echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run secure datanodes. "
echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
"and set JSVC_HOME to the directory containing the jsvc binary."
exit
fi
if [[ ! $JSVC_OUTFILE ]]; then export CLASSPATH
JSVC_OUTFILE="$HADOOP_LOG_DIR/jsvc.out"
fi
if [[ ! $JSVC_ERRFILE ]]; then if [[ -n "${daemon}" ]]; then
JSVC_ERRFILE="$HADOOP_LOG_DIR/jsvc.err" if [[ -n "${secure_service}" ]]; then
fi hadoop_secure_daemon_handler \
"${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
exec "$JSVC" \ "${daemon_pidfile}" "${daemon_outfile}" \
-Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \ "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
-errfile "$JSVC_ERRFILE" \
-pidfile "$HADOOP_SECURE_DN_PID" \
-nodetach \
-user "$HADOOP_SECURE_DN_USER" \
-cp "$CLASSPATH" \
$JAVA_HEAP_MAX $HADOOP_OPTS \
org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
elif [ "$starting_privileged_nfs" = "true" ] ; then
if [ "$HADOOP_PID_DIR" = "" ]; then
HADOOP_PRIVILEGED_NFS_PID="/tmp/hadoop_privileged_nfs3.pid"
else else
HADOOP_PRIVILEGED_NFS_PID="$HADOOP_PID_DIR/hadoop_privileged_nfs3.pid" hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
"${daemon_pidfile}" "${daemon_outfile}" "$@"
fi fi
exit $?
JSVC=$JSVC_HOME/jsvc
if [ ! -f $JSVC ]; then
echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run privileged NFS gateways. "
echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
"and set JSVC_HOME to the directory containing the jsvc binary."
exit
fi
if [[ ! $JSVC_OUTFILE ]]; then
JSVC_OUTFILE="$HADOOP_LOG_DIR/nfs3_jsvc.out"
fi
if [[ ! $JSVC_ERRFILE ]]; then
JSVC_ERRFILE="$HADOOP_LOG_DIR/nfs3_jsvc.err"
fi
exec "$JSVC" \
-Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
-errfile "$JSVC_ERRFILE" \
-pidfile "$HADOOP_PRIVILEGED_NFS_PID" \
-nodetach \
-user "$HADOOP_PRIVILEGED_NFS_USER" \
-cp "$CLASSPATH" \
$JAVA_HEAP_MAX $HADOOP_OPTS \
org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter "$@"
else else
# run it # shellcheck disable=SC2086
exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi fi

View File

@ -18,19 +18,67 @@
# included in all the hdfs scripts with source command # included in all the hdfs scripts with source command
# should not be executed directly # should not be executed directly
bin=`which "$0"` function hadoop_subproject_init
bin=`dirname "${bin}"` {
bin=`cd "$bin"; pwd` if [ -e "${HADOOP_CONF_DIR}/hdfs-env.sh" ]; then
. "${HADOOP_CONF_DIR}/hdfs-env.sh"
fi
# at some point in time, someone thought it would be a good idea to
# create separate vars for every subproject. *sigh*
# let's perform some overrides and setup some defaults for bw compat
# this way the common hadoop var's == subproject vars and can be
# used interchangeable from here on out
# ...
# this should get deprecated at some point.
HADOOP_LOG_DIR="${HADOOP_HDFS_LOG_DIR:-$HADOOP_LOG_DIR}"
HADOOP_HDFS_LOG_DIR="${HADOOP_LOG_DIR}"
HADOOP_LOGFILE="${HADOOP_HDFS_LOGFILE:-$HADOOP_LOGFILE}"
HADOOP_HDFS_LOGFILE="${HADOOP_LOGFILE}"
HADOOP_NICENESS=${HADOOP_HDFS_NICENESS:-$HADOOP_NICENESS}
HADOOP_HDFS_NICENESS="${HADOOP_NICENESS}"
HADOOP_STOP_TIMEOUT=${HADOOP_HDFS_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}
HADOOP_HDFS_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
HADOOP_PID_DIR="${HADOOP_HDFS_PID_DIR:-$HADOOP_PID_DIR}"
HADOOP_HDFS_PID_DIR="${HADOOP_PID_DIR}"
HADOOP_ROOT_LOGGER=${HADOOP_HDFS_ROOT_LOGGER:-$HADOOP_ROOT_LOGGER}
HADOOP_HDFS_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME_DIR}"
HADOOP_IDENT_STRING="${HADOOP_HDFS_IDENT_STRING:-$HADOOP_IDENT_STRING}"
HADOOP_HDFS_IDENT_STRING="${HADOOP_IDENT_STRING}"
# turn on the defaults
export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
export HADOOP_DATANODE_OPTS=${HADOOP_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"}
export HADOOP_DN_SECURE_EXTRA_OPTS=${HADOOP_DN_SECURE_EXTRA_OPTS:-"-jvm server"}
export HADOOP_NFS3_SECURE_EXTRA_OPTS=${HADOOP_NFS3_SECURE_EXTRA_OPTS:-"-jvm server"}
export HADOOP_PORTMAP_OPTS=${HADOOP_PORTMAP_OPTS:-"-Xmx512m"}
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} _hd_this="${BASH_SOURCE-$0}"
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P)
. ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_HOME"/libexec/hadoop-config.sh
else
echo "Hadoop common not found."
exit
fi fi
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
. "${HADOOP_HOME}/libexec/hadoop-config.sh"
else
echo "ERROR: Hadoop common not found." 2>&1
exit 1
fi

View File

@ -20,24 +20,40 @@
# This script refreshes all namenodes, it's a simple wrapper # This script refreshes all namenodes, it's a simple wrapper
# for dfsadmin to support multiple namenodes. # for dfsadmin to support multiple namenodes.
bin=`dirname "$0"` # let's locate libexec...
bin=`cd "$bin"; pwd` if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
namenodes=$("$HADOOP_PREFIX/bin/hdfs" getconf -nnRpcAddresses)
if [ "$?" != '0' ] ; then errorFlag='1' ;
else else
for namenode in $namenodes ; do this="${BASH_SOURCE-$0}"
echo "Refreshing namenode [$namenode]" bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
"$HADOOP_PREFIX/bin/hdfs" dfsadmin -fs hdfs://$namenode -refreshNodes DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
if [ "$?" != '0' ] ; then errorFlag='1' ; fi fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
namenodes=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -nnRpcAddresses)
if [[ "$?" != '0' ]] ; then
errorFlag='1' ;
else
for namenode in ${namenodes} ; do
echo "Refreshing namenode [${namenode}]"
"${HADOOP_HDFS_HOME}/bin/hdfs" dfsadmin \
-fs hdfs://${namenode} -refreshNodes
if [[ "$?" != '0' ]]; then
errorFlag='1'
fi
done done
fi fi
if [ "$errorFlag" = '1' ] ; then if [[ "${errorFlag}" = '1' ]] ; then
echo "Error: refresh of namenodes failed, see error messages above." echo "Error: refresh of namenodes failed, see error messages above."
exit 1 exit 1
else else

View File

@ -15,13 +15,31 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
bin=`dirname "${BASH_SOURCE-$0}"` function usage
bin=`cd "$bin"; pwd` {
echo "Usage: start-balancer.sh [--config confdir] [-policy <policy>] [-threshold <threshold>]"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
# Start balancer daemon. # Start balancer daemon.
"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start balancer $@ exec "${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" start balancer "$@"

View File

@ -20,98 +20,128 @@
# Optinally upgrade or rollback dfs state. # Optinally upgrade or rollback dfs state.
# Run this on master node. # Run this on master node.
usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]" function hadoop_usage
{
echo "Usage: start-dfs.sh [-upgrade|-rollback] [-clusterId]"
}
bin=`dirname "${BASH_SOURCE-$0}"` this="${BASH_SOURCE-$0}"
bin=`cd "$bin"; pwd` bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
# get arguments # get arguments
if [ $# -ge 1 ]; then if [[ $# -ge 1 ]]; then
nameStartOpt="$1" nameStartOpt="$1"
shift shift
case "$nameStartOpt" in case "$nameStartOpt" in
(-upgrade) -upgrade)
;; ;;
(-rollback) -rollback)
dataStartOpt="$nameStartOpt" dataStartOpt="$nameStartOpt"
;; ;;
(*) *)
echo $usage hadoop_exit_with_usage 1
exit 1 ;;
;; esac
esac
fi fi
#Add other possible options #Add other possible options
nameStartOpt="$nameStartOpt $@" nameStartOpt="$nameStartOpt $@"
#--------------------------------------------------------- #---------------------------------------------------------
# namenodes # namenodes
NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes) NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes 2>/dev/null)
if [[ -z "${NAMENODES}" ]]; then
NAMENODES=$(hostname)
fi
echo "Starting namenodes on [$NAMENODES]" echo "Starting namenodes on [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ "${bin}/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ --config "${HADOOP_CONF_DIR}" \
--hostnames "$NAMENODES" \ --hostnames "${NAMENODES}" \
--script "$bin/hdfs" start namenode $nameStartOpt start namenode ${nameStartOpt}
#--------------------------------------------------------- #---------------------------------------------------------
# datanodes (using default slaves file) # datanodes (using default slaves file)
if [ -n "$HADOOP_SECURE_DN_USER" ]; then if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
echo \ [[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
"Attempting to start secure cluster, skipping datanodes. " \ echo "ERROR: Attempting to start secure cluster, skipping datanodes. "
"Run start-secure-dns.sh as root to complete startup." echo "Run start-secure-dns.sh as root or configure "
echo "\${HADOOP_SECURE_COMMAND} to complete startup."
else else
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ echo "Starting datanodes"
--script "$bin/hdfs" start datanode $dataStartOpt
"${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
start datanode ${dataStartOpt}
fi fi
#--------------------------------------------------------- #---------------------------------------------------------
# secondary namenodes (if any) # secondary namenodes (if any)
SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null) SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
if [ -n "$SECONDARY_NAMENODES" ]; then if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
echo "Starting secondary namenodes [$SECONDARY_NAMENODES]" SECONDARY_NAMENODES=$(hostname)
fi
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ if [[ -n "${SECONDARY_NAMENODES}" ]]; then
--config "$HADOOP_CONF_DIR" \ echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]"
--hostnames "$SECONDARY_NAMENODES" \
--script "$bin/hdfs" start secondarynamenode "${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${SECONDARY_NAMENODES}" \
start secondarynamenode
fi fi
#--------------------------------------------------------- #---------------------------------------------------------
# quorumjournal nodes (if any) # quorumjournal nodes (if any)
SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-) SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
case "$SHARED_EDITS_DIR" in case "${SHARED_EDITS_DIR}" in
qjournal://*) qjournal://*)
JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g') JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Starting journal nodes [$JOURNAL_NODES]" echo "Starting journal nodes [${JOURNAL_NODES}]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ "${bin}/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ --config "${HADOOP_CONF_DIR}" \
--hostnames "$JOURNAL_NODES" \ --hostnames "${JOURNAL_NODES}" \
--script "$bin/hdfs" start journalnode ;; start journalnode
;;
esac esac
#--------------------------------------------------------- #---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled # ZK Failover controllers, if auto-HA is enabled
AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled) AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]" echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ "${bin}/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ --config "${HADOOP_CONF_DIR}" \
--hostnames "$NAMENODES" \ --hostnames "${NAMENODES}" \
--script "$bin/hdfs" start zkfc start zkfc
fi fi
# eof # eof

View File

@ -17,17 +17,33 @@
# Run as root to start secure datanodes in a security-enabled cluster. # Run as root to start secure datanodes in a security-enabled cluster.
usage="Usage (run as root in order to start secure datanodes): start-secure-dns.sh"
bin=`dirname "${BASH_SOURCE-$0}"` function hadoop_usage {
bin=`cd "$bin"; pwd` echo "Usage: start-secure-dns.sh"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then # let's locate libexec...
"$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start datanode $dataStartOpt if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else else
echo $usage DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
exec "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" start datanode "${dataStartOpt}"
else
echo hadoop_usage_and_exit 1
fi fi

View File

@ -15,14 +15,32 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
bin=`dirname "${BASH_SOURCE-$0}"` function hadoop_usage
bin=`cd "$bin"; pwd` {
echo "Usage: stop-balancer.sh [--config confdir]"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
# Stop balancer daemon. # Stop balancer daemon.
# Run this on the machine where the balancer is running # Run this on the machine where the balancer is running
"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop balancer "${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" stop balancer

View File

@ -15,75 +15,100 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
bin=`dirname "${BASH_SOURCE-$0}"` function hadoop_usage
bin=`cd "$bin"; pwd` {
echo "Usage: start-balancer.sh [--config confdir] [-policy <policy>] [-threshold <threshold>]"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
#--------------------------------------------------------- #---------------------------------------------------------
# namenodes # namenodes
NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes) NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes)
echo "Stopping namenodes on [$NAMENODES]" echo "Stopping namenodes on [$NAMENODES]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ "${bin}/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ --config "${HADOOP_CONF_DIR}" \
--hostnames "$NAMENODES" \ --hostnames "${NAMENODES}" \
--script "$bin/hdfs" stop namenode stop namenode
#--------------------------------------------------------- #---------------------------------------------------------
# datanodes (using default slaves file) # datanodes (using default slaves file)
if [ -n "$HADOOP_SECURE_DN_USER" ]; then if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
echo \ echo \
"Attempting to stop secure cluster, skipping datanodes. " \ "ERROR: Attempting to stop secure cluster, skipping datanodes. " \
"Run stop-secure-dns.sh as root to complete shutdown." "Run stop-secure-dns.sh as root to complete shutdown."
else else
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ echo "Stopping datanodes"
--script "$bin/hdfs" stop datanode
"${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
fi fi
#--------------------------------------------------------- #---------------------------------------------------------
# secondary namenodes (if any) # secondary namenodes (if any)
SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null) SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
if [ -n "$SECONDARY_NAMENODES" ]; then if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
echo "Stopping secondary namenodes [$SECONDARY_NAMENODES]" SECONDARY_NAMENODES=$(hostname)
fi
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ if [[ -n "${SECONDARY_NAMENODES}" ]]; then
--config "$HADOOP_CONF_DIR" \ echo "Stopping secondary namenodes [${SECONDARY_NAMENODES}]"
--hostnames "$SECONDARY_NAMENODES" \
--script "$bin/hdfs" stop secondarynamenode "${bin}/hadoop-daemons.sh" \
--config "${HADOOP_CONF_DIR}" \
--hostnames "${SECONDARY_NAMENODES}" \
stop secondarynamenode
fi fi
#--------------------------------------------------------- #---------------------------------------------------------
# quorumjournal nodes (if any) # quorumjournal nodes (if any)
SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-) SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
case "$SHARED_EDITS_DIR" in case "${SHARED_EDITS_DIR}" in
qjournal://*) qjournal://*)
JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g') JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
echo "Stopping journal nodes [$JOURNAL_NODES]" echo "Stopping journal nodes [${JOURNAL_NODES}]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ "${bin}/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ --config "${HADOOP_CONF_DIR}" \
--hostnames "$JOURNAL_NODES" \ --hostnames "${JOURNAL_NODES}" \
--script "$bin/hdfs" stop journalnode ;; stop journalnode
;;
esac esac
#--------------------------------------------------------- #---------------------------------------------------------
# ZK Failover controllers, if auto-HA is enabled # ZK Failover controllers, if auto-HA is enabled
AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled) AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
echo "Stopping ZK Failover Controllers on NN hosts [$NAMENODES]" echo "Stopping ZK Failover Controllers on NN hosts [${NAMENODES}]"
"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \ "${bin}/hadoop-daemons.sh" \
--config "$HADOOP_CONF_DIR" \ --config "${HADOOP_CONF_DIR}" \
--hostnames "$NAMENODES" \ --hostnames "${NAMENODES}" \
--script "$bin/hdfs" stop zkfc stop zkfc
fi fi
# eof # eof

View File

@ -17,17 +17,33 @@
# Run as root to start secure datanodes in a security-enabled cluster. # Run as root to start secure datanodes in a security-enabled cluster.
usage="Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
bin=`dirname "${BASH_SOURCE-$0}"` function hadoop_usage {
bin=`cd "$bin"; pwd` echo "Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec this="${BASH_SOURCE-$0}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then # let's locate libexec...
"$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop datanode if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else else
echo $usage DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
exit 1
fi
if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
"${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
else
hadoop_exit_with_usage 1
fi fi

View File

@ -0,0 +1,119 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.protocol.datatransfer;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
import org.apache.hadoop.util.CombinedIPWhiteList;
public class WhitelistBasedTrustedChannelResolver extends TrustedChannelResolver {
private CombinedIPWhiteList whiteListForServer;
private CombinedIPWhiteList whitelistForClient;
private static final String FIXEDWHITELIST_DEFAULT_LOCATION = "/etc/hadoop/fixedwhitelist";
private static final String VARIABLEWHITELIST_DEFAULT_LOCATION = "/etc/hadoop/whitelist";
/**
* Path to the file to containing subnets and ip addresses to form fixed whitelist.
*/
public static final String DFS_DATATRANSFER_SERVER_FIXEDWHITELIST_FILE =
"dfs.datatransfer.server.fixedwhitelist.file";
/**
* Enables/Disables variable whitelist
*/
public static final String DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_ENABLE =
"dfs.datatransfer.server.variablewhitelist.enable";
/**
* Path to the file to containing subnets and ip addresses to form variable whitelist.
*/
public static final String DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_FILE =
"dfs.datatransfer.server.variablewhitelist.file";
/**
* time in seconds by which the variable whitelist file is checked for updates
*/
public static final String DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_CACHE_SECS =
"dfs.datatransfer.server.variablewhitelist.cache.secs";
/**
* Path to the file to containing subnets and ip addresses to form fixed whitelist.
*/
public static final String DFS_DATATRANSFER_CLIENT_FIXEDWHITELIST_FILE =
"dfs.datatransfer.client.fixedwhitelist.file";
/**
* Enables/Disables variable whitelist
*/
public static final String DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_ENABLE =
"dfs.datatransfer.client.variablewhitelist.enable";
/**
* Path to the file to containing subnets and ip addresses to form variable whitelist.
*/
public static final String DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_FILE =
"dfs.datatransfer.client.variablewhitelist.file";
/**
* time in seconds by which the variable whitelist file is checked for updates
*/
public static final String DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_CACHE_SECS =
"dfs.datatransfer.client.variablewhitelist.cache.secs";
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
String fixedFile = conf.get(DFS_DATATRANSFER_SERVER_FIXEDWHITELIST_FILE,
FIXEDWHITELIST_DEFAULT_LOCATION);
String variableFile = null;
long expiryTime = 0;
if (conf.getBoolean(DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_ENABLE, false)) {
variableFile = conf.get(DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_FILE,
VARIABLEWHITELIST_DEFAULT_LOCATION);
expiryTime =
conf.getLong(DFS_DATATRANSFER_SERVER_VARIABLEWHITELIST_CACHE_SECS,3600) * 1000;
}
whiteListForServer = new CombinedIPWhiteList(fixedFile,variableFile,expiryTime);
fixedFile = conf.get(DFS_DATATRANSFER_CLIENT_FIXEDWHITELIST_FILE, fixedFile);
expiryTime = 0;
if (conf.getBoolean(DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_ENABLE, false)) {
variableFile = conf.get(DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_FILE,variableFile);
expiryTime =
conf.getLong(DFS_DATATRANSFER_CLIENT_VARIABLEWHITELIST_CACHE_SECS,3600) * 1000;
}
whitelistForClient = new CombinedIPWhiteList(fixedFile,variableFile,expiryTime);
}
public boolean isTrusted() {
try {
return whitelistForClient.isIn(InetAddress.getLocalHost().getHostAddress());
} catch (UnknownHostException e) {
return false;
}
}
public boolean isTrusted(InetAddress clientAddress) {
return whiteListForServer.isIn(clientAddress.getHostAddress());
}
}

View File

@ -738,7 +738,12 @@ private void manageWriterOsCache(long offsetInBlock) {
LOG.warn("Error managing cache for writer of block " + block, t); LOG.warn("Error managing cache for writer of block " + block, t);
} }
} }
public void sendOOB() throws IOException, InterruptedException {
((PacketResponder) responder.getRunnable()).sendOOBResponse(PipelineAck
.getRestartOOBStatus());
}
void receiveBlock( void receiveBlock(
DataOutputStream mirrOut, // output to next datanode DataOutputStream mirrOut, // output to next datanode
DataInputStream mirrIn, // input from next datanode DataInputStream mirrIn, // input from next datanode
@ -830,9 +835,7 @@ void receiveBlock(
// The worst case is not recovering this RBW replica. // The worst case is not recovering this RBW replica.
// Client will fall back to regular pipeline recovery. // Client will fall back to regular pipeline recovery.
} }
try { try {
((PacketResponder) responder.getRunnable()).
sendOOBResponse(PipelineAck.getRestartOOBStatus());
// Even if the connection is closed after the ack packet is // Even if the connection is closed after the ack packet is
// flushed, the client can react to the connection closure // flushed, the client can react to the connection closure
// first. Insert a delay to lower the chance of client // first. Insert a delay to lower the chance of client
@ -840,8 +843,6 @@ void receiveBlock(
Thread.sleep(1000); Thread.sleep(1000);
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
// It is already going down. Ignore this. // It is already going down. Ignore this.
} catch (IOException ioe) {
LOG.info("Error sending OOB Ack.", ioe);
} }
} }
responder.interrupt(); responder.interrupt();

View File

@ -270,6 +270,7 @@ public static InetSocketAddress createSocketAddr(String target) {
public final static String EMPTY_DEL_HINT = ""; public final static String EMPTY_DEL_HINT = "";
final AtomicInteger xmitsInProgress = new AtomicInteger(); final AtomicInteger xmitsInProgress = new AtomicInteger();
Daemon dataXceiverServer = null; Daemon dataXceiverServer = null;
DataXceiverServer xserver = null;
Daemon localDataXceiverServer = null; Daemon localDataXceiverServer = null;
ShortCircuitRegistry shortCircuitRegistry = null; ShortCircuitRegistry shortCircuitRegistry = null;
ThreadGroup threadGroup = null; ThreadGroup threadGroup = null;
@ -649,8 +650,8 @@ private void initDataXceiver(Configuration conf) throws IOException {
streamingAddr = tcpPeerServer.getStreamingAddr(); streamingAddr = tcpPeerServer.getStreamingAddr();
LOG.info("Opened streaming server at " + streamingAddr); LOG.info("Opened streaming server at " + streamingAddr);
this.threadGroup = new ThreadGroup("dataXceiverServer"); this.threadGroup = new ThreadGroup("dataXceiverServer");
this.dataXceiverServer = new Daemon(threadGroup, xserver = new DataXceiverServer(tcpPeerServer, conf, this);
new DataXceiverServer(tcpPeerServer, conf, this)); this.dataXceiverServer = new Daemon(threadGroup, xserver);
this.threadGroup.setDaemon(true); // auto destroy when empty this.threadGroup.setDaemon(true); // auto destroy when empty
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
@ -1137,6 +1138,11 @@ private void registerMXBean() {
dataNodeInfoBeanName = MBeans.register("DataNode", "DataNodeInfo", this); dataNodeInfoBeanName = MBeans.register("DataNode", "DataNodeInfo", this);
} }
@VisibleForTesting
public DataXceiverServer getXferServer() {
return xserver;
}
@VisibleForTesting @VisibleForTesting
public int getXferPort() { public int getXferPort() {
return streamingAddr.getPort(); return streamingAddr.getPort();
@ -1395,6 +1401,7 @@ public void shutdown() {
// in order to avoid any further acceptance of requests, but the peers // in order to avoid any further acceptance of requests, but the peers
// for block writes are not closed until the clients are notified. // for block writes are not closed until the clients are notified.
if (dataXceiverServer != null) { if (dataXceiverServer != null) {
xserver.sendOOBToPeers();
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill(); ((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
this.dataXceiverServer.interrupt(); this.dataXceiverServer.interrupt();
} }

View File

@ -103,7 +103,8 @@ class DataXceiver extends Receiver implements Runnable {
private long opStartTime; //the start time of receiving an Op private long opStartTime; //the start time of receiving an Op
private final InputStream socketIn; private final InputStream socketIn;
private OutputStream socketOut; private OutputStream socketOut;
private BlockReceiver blockReceiver = null;
/** /**
* Client Name used in previous operation. Not available on first request * Client Name used in previous operation. Not available on first request
* on the socket. * on the socket.
@ -159,6 +160,12 @@ private OutputStream getOutputStream() {
return socketOut; return socketOut;
} }
public void sendOOB() throws IOException, InterruptedException {
LOG.info("Sending OOB to peer: " + peer);
if(blockReceiver!=null)
blockReceiver.sendOOB();
}
/** /**
* Read/write data from/to the DataXceiverServer. * Read/write data from/to the DataXceiverServer.
*/ */
@ -168,7 +175,7 @@ public void run() {
Op op = null; Op op = null;
try { try {
dataXceiverServer.addPeer(peer, Thread.currentThread()); dataXceiverServer.addPeer(peer, Thread.currentThread(), this);
peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout); peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
InputStream input = socketIn; InputStream input = socketIn;
IOStreamPair saslStreams = datanode.saslServer.receive(peer, socketOut, IOStreamPair saslStreams = datanode.saslServer.receive(peer, socketOut,
@ -584,7 +591,6 @@ public void writeBlock(final ExtendedBlock block,
DataOutputStream mirrorOut = null; // stream to next target DataOutputStream mirrorOut = null; // stream to next target
DataInputStream mirrorIn = null; // reply from next target DataInputStream mirrorIn = null; // reply from next target
Socket mirrorSock = null; // socket to next target Socket mirrorSock = null; // socket to next target
BlockReceiver blockReceiver = null; // responsible for data handling
String mirrorNode = null; // the name:port of next target String mirrorNode = null; // the name:port of next target
String firstBadLink = ""; // first datanode that failed in connection setup String firstBadLink = ""; // first datanode that failed in connection setup
Status mirrorInStatus = SUCCESS; Status mirrorInStatus = SUCCESS;
@ -747,6 +753,7 @@ public void writeBlock(final ExtendedBlock block,
IOUtils.closeStream(replyOut); IOUtils.closeStream(replyOut);
IOUtils.closeSocket(mirrorSock); IOUtils.closeSocket(mirrorSock);
IOUtils.closeStream(blockReceiver); IOUtils.closeStream(blockReceiver);
blockReceiver = null;
} }
//update metrics //update metrics

View File

@ -27,11 +27,11 @@
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.hdfs.net.PeerServer; import org.apache.hadoop.hdfs.net.PeerServer;
import org.apache.hadoop.hdfs.server.balancer.Balancer;
import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.Daemon;
import com.google.common.annotations.VisibleForTesting;
/** /**
* Server used for receiving/sending a block of data. * Server used for receiving/sending a block of data.
@ -45,6 +45,7 @@ class DataXceiverServer implements Runnable {
private final PeerServer peerServer; private final PeerServer peerServer;
private final DataNode datanode; private final DataNode datanode;
private final HashMap<Peer, Thread> peers = new HashMap<Peer, Thread>(); private final HashMap<Peer, Thread> peers = new HashMap<Peer, Thread>();
private final HashMap<Peer, DataXceiver> peersXceiver = new HashMap<Peer, DataXceiver>();
private boolean closed = false; private boolean closed = false;
/** /**
@ -217,18 +218,38 @@ void kill() {
} }
} }
synchronized void addPeer(Peer peer, Thread t) throws IOException { synchronized void addPeer(Peer peer, Thread t, DataXceiver xceiver)
throws IOException {
if (closed) { if (closed) {
throw new IOException("Server closed."); throw new IOException("Server closed.");
} }
peers.put(peer, t); peers.put(peer, t);
peersXceiver.put(peer, xceiver);
} }
synchronized void closePeer(Peer peer) { synchronized void closePeer(Peer peer) {
peers.remove(peer); peers.remove(peer);
peersXceiver.remove(peer);
IOUtils.cleanup(null, peer); IOUtils.cleanup(null, peer);
} }
// Sending OOB to all peers
public synchronized void sendOOBToPeers() {
if (!datanode.shutdownForUpgrade) {
return;
}
for (Peer p : peers.keySet()) {
try {
peersXceiver.get(p).sendOOB();
} catch (IOException e) {
LOG.warn("Got error when sending OOB message.", e);
} catch (InterruptedException e) {
LOG.warn("Interrupted when sending OOB message.");
}
}
}
// Notify all peers of the shutdown and restart. // Notify all peers of the shutdown and restart.
// datanode.shouldRun should still be true and datanode.restarting should // datanode.shouldRun should still be true and datanode.restarting should
// be set true before calling this method. // be set true before calling this method.
@ -247,6 +268,7 @@ synchronized void closeAllPeers() {
IOUtils.cleanup(LOG, p); IOUtils.cleanup(LOG, p);
} }
peers.clear(); peers.clear();
peersXceiver.clear();
} }
// Return the number of peers. // Return the number of peers.
@ -254,7 +276,14 @@ synchronized int getNumPeers() {
return peers.size(); return peers.size();
} }
// Return the number of peers and DataXceivers.
@VisibleForTesting
synchronized int getNumPeersXceiver() {
return peersXceiver.size();
}
synchronized void releasePeer(Peer peer) { synchronized void releasePeer(Peer peer) {
peers.remove(peer); peers.remove(peer);
peersXceiver.remove(peer);
} }
} }

View File

@ -27,11 +27,14 @@
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSOutputStream;
import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
@ -67,6 +70,7 @@ public class TestDataNodeRollingUpgrade {
private void startCluster() throws IOException { private void startCluster() throws IOException {
conf = new HdfsConfiguration(); conf = new HdfsConfiguration();
conf.setInt("dfs.blocksize", 1024*1024);
cluster = new Builder(conf).numDataNodes(REPL_FACTOR).build(); cluster = new Builder(conf).numDataNodes(REPL_FACTOR).build();
cluster.waitActive(); cluster.waitActive();
fs = cluster.getFileSystem(); fs = cluster.getFileSystem();
@ -243,4 +247,48 @@ public void testDatanodeRollingUpgradeWithRollback() throws Exception {
shutdownCluster(); shutdownCluster();
} }
} }
@Test (timeout=600000)
// Test DatanodeXceiver has correct peer-dataxceiver pairs for sending OOB message
public void testDatanodePeersXceiver() throws Exception {
try {
startCluster();
// Create files in DFS.
String testFile1 = "/TestDataNodeXceiver1.dat";
String testFile2 = "/TestDataNodeXceiver2.dat";
String testFile3 = "/TestDataNodeXceiver3.dat";
DFSClient client1 = new DFSClient(NameNode.getAddress(conf), conf);
DFSClient client2 = new DFSClient(NameNode.getAddress(conf), conf);
DFSClient client3 = new DFSClient(NameNode.getAddress(conf), conf);
DFSOutputStream s1 = (DFSOutputStream) client1.create(testFile1, true);
DFSOutputStream s2 = (DFSOutputStream) client2.create(testFile2, true);
DFSOutputStream s3 = (DFSOutputStream) client3.create(testFile3, true);
byte[] toWrite = new byte[1024*1024*8];
Random rb = new Random(1111);
rb.nextBytes(toWrite);
s1.write(toWrite, 0, 1024*1024*8);
s1.flush();
s2.write(toWrite, 0, 1024*1024*8);
s2.flush();
s3.write(toWrite, 0, 1024*1024*8);
s3.flush();
assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer()
.getNumPeersXceiver());
s1.close();
s2.close();
s3.close();
assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer()
.getNumPeersXceiver());
client1.close();
client2.close();
client3.close();
} finally {
shutdownCluster();
}
}
} }

View File

@ -233,6 +233,9 @@ Release 2.6.0 - UNRELEASED
MAPREDUCE-6036. TestJobEndNotifier fails intermittently in branch-2 (chang MAPREDUCE-6036. TestJobEndNotifier fails intermittently in branch-2 (chang
li via jlowe) li via jlowe)
MAPREDUCE-6012. DBInputSplit creates invalid ranges on Oracle.
(Wei Yan via kasha)
Release 2.5.0 - UNRELEASED Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -15,138 +15,129 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
bin=`which $0` function hadoop_usage
bin=`dirname ${bin}` {
bin=`cd "$bin" > /dev/null; pwd` echo "Usage: mapred [--config confdir] [--daemon (start|stop|status)] COMMAND"
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
. ${HADOOP_LIBEXEC_DIR}/mapred-config.sh
else
. "$bin/mapred-config.sh"
fi
function print_usage(){
echo "Usage: mapred [--config confdir] COMMAND"
echo " where COMMAND is one of:" echo " where COMMAND is one of:"
echo " pipes run a Pipes job"
echo " job manipulate MapReduce jobs" echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
echo " queue get information regarding JobQueues"
echo " classpath prints the class path needed for running" echo " classpath prints the class path needed for running"
echo " mapreduce subcommands" echo " mapreduce subcommands"
echo " historyserver run job history servers as a standalone daemon"
echo " distcp <srcurl> <desturl> copy file or directories recursively" echo " distcp <srcurl> <desturl> copy file or directories recursively"
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive" echo " job manipulate MapReduce jobs"
echo " hsadmin job history server admin interface" echo " historyserver run job history servers as a standalone daemon"
echo " pipes run a Pipes job"
echo " queue get information regarding JobQueues"
echo " sampler sampler"
echo "" echo ""
echo "Most commands print help when invoked w/o parameters." echo "Most commands print help when invoked w/o parameters."
} }
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# let's locate libexec...
if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/mapred-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/mapred-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/mapred-config.sh." 2>&1
exit 1
fi
if [ $# = 0 ]; then if [ $# = 0 ]; then
print_usage hadoop_exit_with_usage 1
exit
fi fi
COMMAND=$1 COMMAND=$1
shift shift
case $COMMAND in case ${COMMAND} in
# usage flags mradmin|jobtracker|tasktracker|groups)
--help|-help|-h) echo "Sorry, the ${COMMAND} command is no longer supported."
print_usage echo "You may find similar functionality with the \"yarn\" shell command."
exit hadoop_exit_with_usage 1
;; ;;
archive)
CLASS=org.apache.hadoop.tools.HadoopArchives
hadoop_add_classpath "${TOOL_PATH}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
classpath)
hadoop_finalize
echo "${CLASSPATH}"
exit 0
;;
distcp)
CLASS=org.apache.hadoop.tools.DistCp
hadoop_add_classpath "${TOOL_PATH}"
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
historyserver)
daemon="true"
CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}"
if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
JAVA_HEAP_MAX="-Xmx${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}m"
fi
HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
;;
job)
CLASS=org.apache.hadoop.mapred.JobClient
;;
pipes)
CLASS=org.apache.hadoop.mapred.pipes.Submitter
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
queue)
CLASS=org.apache.hadoop.mapred.JobQueueClient
;;
sampler)
CLASS=org.apache.hadoop.mapred.lib.InputSampler
HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
;;
-*|*)
hadoop_exit_with_usage 1
;;
esac esac
if [ "$COMMAND" = "job" ] ; then daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
CLASS=org.apache.hadoop.mapred.JobClient daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "queue" ] ; then
CLASS=org.apache.hadoop.mapred.JobQueueClient if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" # shellcheck disable=SC2034
elif [ "$COMMAND" = "pipes" ] ; then HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
CLASS=org.apache.hadoop.mapred.pipes.Submitter hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}"
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" # shellcheck disable=SC2034
elif [ "$COMMAND" = "sampler" ] ; then HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
CLASS=org.apache.hadoop.mapred.lib.InputSampler
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "classpath" ] ; then
echo -n
elif [ "$COMMAND" = "historyserver" ] ; then
CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
HADOOP_OPTS="$HADOOP_OPTS -Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console} $HADOOP_JOB_HISTORYSERVER_OPTS"
if [ "$HADOOP_JOB_HISTORYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$HADOOP_JOB_HISTORYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "mradmin" ] \
|| [ "$COMMAND" = "jobtracker" ] \
|| [ "$COMMAND" = "tasktracker" ] \
|| [ "$COMMAND" = "groups" ] ; then
echo "Sorry, the $COMMAND command is no longer supported."
echo "You may find similar functionality with the \"yarn\" shell command."
print_usage
exit 1
elif [ "$COMMAND" = "distcp" ] ; then
CLASS=org.apache.hadoop.tools.DistCp
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "archive" ] ; then
CLASS=org.apache.hadoop.tools.HadoopArchives
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "hsadmin" ] ; then
CLASS=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
else
echo $COMMAND - invalid command
print_usage
exit 1
fi fi
# for developers, add mapred classes to CLASSPATH hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then hadoop_finalize
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
fi
if [ -d "$HADOOP_MAPRED_HOME/build/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build
fi
if [ -d "$HADOOP_MAPRED_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/test/classes
fi
if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
fi
# for releases, add core mapred jar & webapps to CLASSPATH
if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR}
fi
for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# Need YARN jars also
for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add libs to CLASSPATH
for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add modules to CLASSPATH
for f in $HADOOP_MAPRED_HOME/modules/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
if [ "$COMMAND" = "classpath" ] ; then
echo $CLASSPATH
exit
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
export CLASSPATH export CLASSPATH
exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
if [[ -n "${daemon}" ]]; then
if [[ -n "${secure_service}" ]]; then
hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}"\
"${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
"${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
else
hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
"${daemon_pidfile}" "${daemon_outfile}" "$@"
fi
exit $?
else
hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi

View File

@ -18,35 +18,55 @@
# included in all the mapred scripts with source command # included in all the mapred scripts with source command
# should not be executed directly # should not be executed directly
bin=`which "$0"` function hadoop_subproject_init
bin=`dirname "${bin}"` {
bin=`cd "$bin"; pwd` if [ -e "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
. "${HADOOP_CONF_DIR}/mapred-env.sh"
fi
# at some point in time, someone thought it would be a good idea to
# create separate vars for every subproject. *sigh*
# let's perform some overrides and setup some defaults for bw compat
# this way the common hadoop var's == subproject vars and can be
# used interchangeable from here on out
# ...
# this should get deprecated at some point.
HADOOP_LOG_DIR="${HADOOP_MAPRED_LOG_DIR:-$HADOOP_LOG_DIR}"
HADOOP_MAPRED_LOG_DIR="${HADOOP_LOG_DIR}"
HADOOP_LOGFILE="${HADOOP_MAPRED_LOGFILE:-$HADOOP_LOGFILE}"
HADOOP_MAPRED_LOGFILE="${HADOOP_LOGFILE}"
HADOOP_NICENESS="${HADOOP_MAPRED_NICENESS:-$HADOOP_NICENESS}"
HADOOP_MAPRED_NICENESS="${HADOOP_NICENESS}"
HADOOP_STOP_TIMEOUT="${HADOOP_MAPRED_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
HADOOP_MAPRED_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
HADOOP_PID_DIR="${HADOOP_MAPRED_PID_DIR:-$HADOOP_PID_DIR}"
HADOOP_MAPRED_PID_DIR="${HADOOP_PID_DIR}"
HADOOP_ROOT_LOGGER="${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}"
HADOOP_MAPRED_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_HOME_DIR}"
HADOOP_IDENT_STRING="${HADOOP_MAPRED_IDENT_STRING:-$HADOOP_IDENT_STRING}"
HADOOP_MAPRED_IDENT_STRING="${HADOOP_IDENT_STRING}"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} _mc_this="${BASH_SOURCE-$0}"
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_mc_this}")" >/dev/null && pwd -P)
fi
if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [ -e "${HADOOP_COMMON_HOME}/bin/hadoop-config.sh" ]; then elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
. "$HADOOP_COMMON_HOME"/bin/hadoop-config.sh . "${HADOOP_HOME}/libexec/hadoop-config.sh"
elif [ -e "${HADOOP_HOME}/bin/hadoop-config.sh" ]; then
. "$HADOOP_HOME"/bin/hadoop-config.sh
elif [ -e "${HADOOP_MAPRED_HOME}/bin/hadoop-config.sh" ]; then
. "$HADOOP_MAPRED_HOME"/bin/hadoop-config.sh
else else
echo "Hadoop common not found." echo "Hadoop common not found."
exit exit
fi fi
# Only set locally to use in HADOOP_OPTS. No need to export.
# The following defaults are useful when somebody directly invokes bin/mapred.
HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs}
HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log}
HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE"
export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}"

View File

@ -15,133 +15,32 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
function hadoop_usage
{
echo "Usage: mr-jobhistory-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
}
# # let's locate libexec...
# Environment Variables if [[ -n "${HADOOP_PREFIX}" ]]; then
# DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
# HADOOP_JHS_LOGGER Hadoop JobSummary logger. else
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf. this="${BASH_SOURCE-$0}"
# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default. bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0. DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
## fi
usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] (start|stop) <mapred-command> " HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
# if no args specified, show usage HADOOP_NEW_CONFIG=true
if [ $# -le 1 ]; then if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
echo $usage . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1 exit 1
fi fi
bin=`dirname "${BASH_SOURCE-$0}"` daemonmode=$1
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
. $HADOOP_LIBEXEC_DIR/mapred-config.sh
fi
# get arguments
startStop=$1
shift
command=$1
shift shift
hadoop_rotate_log () exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
{ --config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then
export HADOOP_MAPRED_IDENT_STRING="$USER"
fi
export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}}
export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA}
export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA}
if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
. "${HADOOP_CONF_DIR}/mapred-env.sh"
fi
mkdir -p "$HADOOP_MAPRED_LOG_DIR"
chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR
if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then
HADOOP_MAPRED_PID_DIR=/tmp
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING"
log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out
pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid
HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then
export HADOOP_MAPRED_NICENESS=0
fi
case $startStop in
(start)
mkdir -p "$HADOOP_MAPRED_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
exit 1
fi
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_MAPRED_HOME"
nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1; head "$log"
;;
(stop)
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $HADOOP_MAPRED_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no $command to stop
fi
rm -f $pid
else
echo no $command to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac

View File

@ -13,15 +13,59 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/ ##
## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
## WORK DONE BY THE mapred AND RELATED COMMANDS.
##
## Precedence rules:
##
## mapred-env.sh > hadoop-env.sh > hard-coded defaults
##
## MAPRED_xyz > HADOOP_xyz > hard-coded defaults
##
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 ###
# Generic settings for MapReduce
###
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA #Override the log4j settings for all MR apps
# export MAPRED_ROOT_LOGGER="INFO,console"
# Override Hadoop's log directory & file
# export HADOOP_MAPRED_LOG_DIR=""
# Override Hadoop's pid directory
# export HADOOP_MAPRED_PID_DIR=
# Override Hadoop's identity string. $USER by default.
# This is used in writing log and pid files, so keep that in mind!
# export HADOOP_MAPRED_IDENT_STRING=$USER
# Override Hadoop's process priority
# Note that sub-processes will also run at this level!
# export HADOOP_MAPRED_NICENESS=0
###
# Job History Server specific parameters
###
# Specify the max heapsize for the Job History Server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either
# MAPRED_OPTS, HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS
#export HADOOP_JOB_HISTORYSERVER_OPTS= #export HADOOP_JOB_HISTORYSERVER_OPTS=
#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default.
#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger. # Specify the log4j settings for the JobHistoryServer
#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default. #export HADOOP_JHS_LOGGER=INFO,RFA
#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.

View File

@ -81,15 +81,14 @@ protected String getSelectQuery() {
try { try {
DBInputFormat.DBInputSplit split = getSplit(); DBInputFormat.DBInputSplit split = getSplit();
if (split.getLength() > 0 && split.getStart() > 0){ if (split.getLength() > 0){
String querystring = query.toString(); String querystring = query.toString();
query = new StringBuilder(); query = new StringBuilder();
query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( "); query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( ");
query.append(querystring); query.append(querystring);
query.append(" ) a WHERE rownum <= ").append(split.getStart()); query.append(" ) a WHERE rownum <= ").append(split.getEnd());
query.append(" + ").append(split.getLength()); query.append(" ) WHERE dbif_rno > ").append(split.getStart());
query.append(" ) WHERE dbif_rno >= ").append(split.getStart());
} }
} catch (IOException ex) { } catch (IOException ex) {
// ignore, will not throw. // ignore, will not throw.

View File

@ -110,7 +110,7 @@ public void testOracleDBRecordReader() throws Exception {
splitter, NullDBWritable.class, configuration, connect, splitter, NullDBWritable.class, configuration, connect,
dbConfiguration, "condition", fields, "table"); dbConfiguration, "condition", fields, "table");
assertEquals( assertEquals(
"SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( SELECT f1, f2 FROM table WHERE condition ORDER BY Order ) a WHERE rownum <= 1 + 9 ) WHERE dbif_rno >= 1", "SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( SELECT f1, f2 FROM table WHERE condition ORDER BY Order ) a WHERE rownum <= 10 ) WHERE dbif_rno > 1",
recorder.getSelectQuery()); recorder.getSelectQuery());
} }

View File

@ -36,8 +36,8 @@
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -61,6 +61,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
@ -933,4 +934,10 @@ public synchronized List<Container> getTransferredContainers(
return new HashMap<ApplicationId, return new HashMap<ApplicationId,
SchedulerApplication<SchedulerApplicationAttempt>>(); SchedulerApplication<SchedulerApplicationAttempt>>();
} }
@Override
protected void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) {
// do nothing
}
} }

View File

@ -211,6 +211,12 @@ Release 2.6.0 - UNRELEASED
YARN-2397. Avoided loading two authentication filters for RM and TS web YARN-2397. Avoided loading two authentication filters for RM and TS web
interfaces. (Varun Vasudev via zjshen) interfaces. (Varun Vasudev via zjshen)
YARN-2409. RM ActiveToStandBy transition missing stoping previous rmDispatcher.
(Rohith via jianhe)
YARN-2249. Avoided AM release requests being lost on work preserving RM
restart. (Jian He via zjshen)
Release 2.5.0 - UNRELEASED Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1,70 +0,0 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Run a shell command on all slave hosts.
#
# Environment Variables
#
# YARN_SLAVES File naming remote hosts.
# Default is ${YARN_CONF_DIR}/slaves.
# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
# YARN_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
# YARN_SSH_OPTS Options passed to ssh when running remote commands.
##
usage="Usage: slaves.sh [--config confdir] command..."
# if no args specified, show usage
if [ $# -le 0 ]; then
echo $usage
exit 1
fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
# If the slaves file is specified in the command line,
# then it takes precedence over the definition in
# yarn-env.sh. Save it here.
HOSTLIST=$YARN_SLAVES
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
fi
if [ "$HOSTLIST" = "" ]; then
if [ "$YARN_SLAVES" = "" ]; then
export HOSTLIST="${YARN_CONF_DIR}/slaves"
else
export HOSTLIST="${YARN_SLAVES}"
fi
fi
for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
ssh $YARN_SSH_OPTS $slave $"${@// /\\ }" \
2>&1 | sed "s/^/$slave: /" &
if [ "$YARN_SLAVE_SLEEP" != "" ]; then
sleep $YARN_SLAVE_SLEEP
fi
done
wait

View File

@ -16,20 +16,34 @@
# limitations under the License. # limitations under the License.
# Start all yarn daemons. Run this on master node. function hadoop_usage
{
echo "Usage: start-yarn.sh [--config confdir]"
}
echo "starting yarn daemons" this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
bin=`dirname "${BASH_SOURCE-$0}"` # let's locate libexec...
bin=`cd "$bin"; pwd` if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # shellcheck disable=SC2034
. $HADOOP_LIBEXEC_DIR/yarn-config.sh HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# start resourceManager # start resourceManager
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start resourcemanager "${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" start resourcemanager
# start nodeManager # start nodeManager
"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR start nodemanager "${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}" start nodemanager
# start proxyserver # start proxyserver
#"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start proxyserver #"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" start proxyserver

View File

@ -18,18 +18,34 @@
# Stop all yarn daemons. Run this on master node. # Stop all yarn daemons. Run this on master node.
echo "stopping yarn daemons" function hadoop_usage
{
echo "Usage: stop-yarn.sh [--config confdir]"
}
bin=`dirname "${BASH_SOURCE-$0}"` this="${BASH_SOURCE-$0}"
bin=`cd "$bin"; pwd` bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="$bin"/../libexec # let's locate libexec...
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} if [[ -n "${HADOOP_PREFIX}" ]]; then
. $HADOOP_LIBEXEC_DIR/yarn-config.sh DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
# stop resourceManager HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop resourcemanager # shellcheck disable=SC2034
# stop nodeManager HADOOP_NEW_CONFIG=true
"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR stop nodemanager if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
# stop proxy server . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop proxyserver else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1
fi
# start resourceManager
"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" stop resourcemanager
# start nodeManager
"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}" stop nodemanager
# start proxyserver
#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}" stop proxyserver

View File

@ -15,266 +15,182 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
function hadoop_usage
# The Hadoop command script {
# echo "Usage: yarn [--config confdir] [--daemon (start|stop|status)] COMMAND"
# Environment Variables
#
# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
# YARN_USER_CLASSPATH Additional user CLASSPATH entries.
#
# YARN_USER_CLASSPATH_FIRST If set to non empty value then the user classpath
# specified in YARN_USER_CLASSPATH will be
# appended at the beginning of YARN's final
# classpath instead of at the end.
#
# YARN_HEAPSIZE The maximum amount of heap to use, in MB.
# Default is 1000.
#
# YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command
# eg YARN_NODEMANAGER_HEAPSIZE sets the heap
# size for the NodeManager. If you set the
# heap size in YARN_{COMMAND}_OPTS or YARN_OPTS
# they take precedence.
#
# YARN_OPTS Extra Java runtime options.
#
# YARN_CLIENT_OPTS when the respective command is run.
# YARN_{COMMAND}_OPTS etc YARN_NODEMANAGER_OPTS applies to NodeManager
# for e.g. YARN_CLIENT_OPTS applies to
# more than one command (fs, dfs, fsck,
# dfsadmin etc)
#
# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
#
# YARN_ROOT_LOGGER The root appender. Default is INFO,console
#
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin" > /dev/null; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
function print_usage(){
echo "Usage: yarn [--config confdir] COMMAND"
echo "where COMMAND is one of:" echo "where COMMAND is one of:"
echo " resourcemanager -format-state-store deletes the RMStateStore" echo " application prints application(s) report/kill application"
echo " resourcemanager run the ResourceManager" echo " applicationattempt prints applicationattempt(s) report"
echo " nodemanager run a nodemanager on each slave" echo " classpath prints the class path needed to get the"
echo " timelineserver run the timeline server" echo " Hadoop jar and the required libraries"
echo " rmadmin admin tools"
echo " version print the version"
echo " jar <jar> run a jar file"
echo " application prints application(s)"
echo " report/kill application"
echo " applicationattempt prints applicationattempt(s)"
echo " report"
echo " container prints container(s) report" echo " container prints container(s) report"
echo " node prints node report(s)" echo " daemonlog get/set the log level for each daemon"
echo " jar <jar> run a jar file"
echo " logs dump container logs" echo " logs dump container logs"
echo " classpath prints the class path needed to" echo " node prints node report(s)"
echo " get the Hadoop jar and the" echo " nodemanager run a nodemanager on each slave"
echo " required libraries" echo " proxyserver run the web app proxy server"
echo " daemonlog get/set the log level for each" echo " resourcemanager run the ResourceManager"
echo " daemon" echo " resourcemanager -format-state-store deletes the RMStateStore"
echo " rmadmin admin tools"
echo " timelineserver run the timeline server"
echo " version print the version"
echo " or" echo " or"
echo " CLASSNAME run the class named CLASSNAME" echo " CLASSNAME run the class named CLASSNAME"
echo "Most commands print help when invoked w/o parameters." echo "Most commands print help when invoked w/o parameters."
} }
# if no args specified, show usage
if [ $# = 0 ]; then # let's locate libexec...
print_usage if [[ -n "${HADOOP_PREFIX}" ]]; then
DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
this="${BASH_SOURCE-$0}"
bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1 exit 1
fi fi
# if no args specified, show usage
if [[ $# = 0 ]]; then
hadoop_exit_with_usage 1
fi
# get arguments # get arguments
COMMAND=$1 COMMAND=$1
shift shift
case $COMMAND in case "${COMMAND}" in
# usage flags application|applicationattempt|container)
--help|-help|-h) CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
print_usage YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
classpath)
hadoop_finalize
echo "${CLASSPATH}"
exit exit
;; ;;
daemonlog)
CLASS=org.apache.hadoop.log.LogLevel
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
jar)
CLASS=org.apache.hadoop.util.RunJar
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
historyserver)
daemon="true"
echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
echo "Instead use the timelineserver command for it." 1>&2
echo "Starting the History Server anyway..." 1>&2
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
;;
logs)
CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
node)
CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
nodemanager)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
YARN_OPTS="${YARN_OPTS} ${YARN_NODEMANAGER_OPTS}"
if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_NODEMANAGER_HEAPSIZE}m"
fi
;;
proxyserver)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
YARN_OPTS="${YARN_OPTS} ${YARN_PROXYSERVER_OPTS}"
if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_PROXYSERVER_HEAPSIZE}m"
fi
;;
resourcemanager)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
YARN_OPTS="${YARN_OPTS} ${YARN_RESOURCEMANAGER_OPTS}"
if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_RESOURCEMANAGER_HEAPSIZE}m"
fi
;;
rmadmin)
CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
timelineserver)
daemon="true"
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}"
if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m"
fi
;;
version)
CLASS=org.apache.hadoop.util.VersionInfo
YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
;;
-*)
hadoop_exit_with_usage 1
;;
*)
CLASS="${COMMAND}"
;;
esac esac
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then # set HADOOP_OPTS to YARN_OPTS so that we can use
. "${YARN_CONF_DIR}/yarn-env.sh" # finalize, etc, without doing anything funky
HADOOP_OPTS="${YARN_OPTS}"
daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
# shellcheck disable=SC2034
HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
YARN_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
fi fi
# some Java parameters hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java # Add YARN custom options to comamnd line in case someone actaully
JAVA_HEAP_MAX=-Xmx1000m # used these.
#
# Note that we are replacing ' ' with '\ ' so that when we exec
# stuff it works
#
hadoop_add_param HADOOP_OPTS yarn.log.dir "-Dyarn.log.dir=${HADOOP_LOG_DIR/ /\ }"
hadoop_add_param HADOOP_OPTS yarn.log.file "-Dyarn.log.file=${HADOOP_LOGFILE/ /\ }"
hadoop_add_param HADOOP_OPTS yarn.home.dir "-Dyarn.home.dir=${HADOOP_YARN_HOME/ /\ }"
hadoop_add_param HADOOP_OPTS yarn.root.logger "-Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
# check envvars which might override default args hadoop_finalize
if [ "$YARN_HEAPSIZE" != "" ]; then
#echo "run with heapsize $YARN_HEAPSIZE"
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
# CLASSPATH initially contains $HADOOP_CONF_DIR & $YARN_CONF_DIR export CLASSPATH
if [ ! -d "$HADOOP_CONF_DIR" ]; then
echo No HADOOP_CONF_DIR set.
echo Please specify it either in yarn-env.sh or in the environment.
exit 1
fi
CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}" if [[ -n "${daemon}" ]]; then
if [[ -n "${secure_service}" ]]; then
# for developers, add Hadoop classes to CLASSPATH hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" \
if [ -d "$HADOOP_YARN_HOME/yarn-api/target/classes" ]; then "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-api/target/classes "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
fi
if [ -d "$HADOOP_YARN_HOME/yarn-common/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-common/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-mapreduce/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-mapreduce/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-master-worker/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-master-worker/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes
fi
if [ -d "$HADOOP_YARN_HOME/build/test/classes" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/target/test/classes
fi
if [ -d "$HADOOP_YARN_HOME/build/tools" ]; then
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/build/tools
fi
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_DIR}/*
CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_LIB_JARS_DIR}/*
# Add user defined YARN_USER_CLASSPATH to the class path (if defined)
if [ -n "$YARN_USER_CLASSPATH" ]; then
if [ -n "$YARN_USER_CLASSPATH_FIRST" ]; then
# User requested to add the custom entries at the beginning
CLASSPATH=${YARN_USER_CLASSPATH}:${CLASSPATH}
else else
# By default we will just append the extra entries at the end hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
CLASSPATH=${CLASSPATH}:${YARN_USER_CLASSPATH} "${daemon_pidfile}" "${daemon_outfile}" "$@"
fi fi
fi exit $?
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# default log directory & file
if [ "$YARN_LOG_DIR" = "" ]; then
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$YARN_LOGFILE" = "" ]; then
YARN_LOGFILE='yarn.log'
fi
# restore ordinary behaviour
unset IFS
# figure out which class to run
if [ "$COMMAND" = "classpath" ] ; then
echo $CLASSPATH
exit
elif [ "$COMMAND" = "rmadmin" ] ; then
CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "application" ] ||
[ "$COMMAND" = "applicationattempt" ] ||
[ "$COMMAND" = "container" ]; then
CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
set -- $COMMAND $@
elif [ "$COMMAND" = "node" ] ; then
CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "resourcemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS"
if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "historyserver" ] ; then
echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
echo "Instead use the timelineserver command for it." 1>&2
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "timelineserver" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/timelineserver-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
YARN_OPTS="$YARN_OPTS $YARN_TIMELINESERVER_OPTS"
if [ "$YARN_TIMELINESERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_TIMELINESERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "nodemanager" ] ; then
CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS"
if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "proxyserver" ] ; then
CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS"
if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m"
fi
elif [ "$COMMAND" = "version" ] ; then
CLASS=org.apache.hadoop.util.VersionInfo
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "jar" ] ; then
CLASS=org.apache.hadoop.util.RunJar
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "logs" ] ; then
CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
elif [ "$COMMAND" = "daemonlog" ] ; then
CLASS=org.apache.hadoop.log.LogLevel
YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
else else
CLASS=$COMMAND hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
fi fi
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$HADOOP_YARN_HOME"
YARN_OPTS="$YARN_OPTS -Dhadoop.home.dir=$HADOOP_YARN_HOME"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $YARN_OPTS -classpath "$CLASSPATH" $CLASS "$@"

View File

@ -13,53 +13,81 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# included in all the hadoop scripts with source command function hadoop_subproject_init
# should not be executable directly {
bin=`which "$0"`
bin=`dirname "${bin}"` # at some point in time, someone thought it would be a good idea to
bin=`cd "$bin"; pwd` # create separate vars for every subproject. *sigh*
# let's perform some overrides and setup some defaults for bw compat
# this way the common hadoop var's == subproject vars and can be
# used interchangeable from here on out
# ...
# this should get deprecated at some point.
if [[ -e "${YARN_CONF_DIR}/yarn-env.sh" ]]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
elif [[ -e "${HADOOP_CONF_DIR}/yarn-env.sh" ]]; then
. "${HADOOP_CONF_DIR}/yarn-env.sh"
fi
if [[ -n "${YARN_CONF_DIR}" ]]; then
HADOOP_CONF_DIR="${YARN_CONF_DIR}"
fi
YARN_CONF_DIR="${HADOOP_CONF_DIR}"
# YARN_CONF_DIR needs precedence over HADOOP_CONF_DIR
# and the various jar dirs
hadoop_add_classpath "${YARN_CONF_DIR}" before
HADOOP_LOG_DIR="${YARN_LOG_DIR:-$HADOOP_LOG_DIR}"
YARN_LOG_DIR="${HADOOP_LOG_DIR}"
HADOOP_LOGFILE="${YARN_LOGFILE:-$HADOOP_LOGFILE}"
YARN_LOGFILE="${HADOOP_LOGFILE}"
HADOOP_NICENESS="${YARN_NICENESS:-$HADOOP_NICENESS}"
YARN_NICENESS="${HADOOP_NICENESS}"
HADOOP_STOP_TIMEOUT="${YARN_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
YARN_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
HADOOP_PID_DIR="${YARN_PID_DIR:-$HADOOP_PID_DIR}"
YARN_PID_DIR="${HADOOP_PID_DIR}"
HADOOP_ROOT_LOGGER="${YARN_ROOT_LOGGER:-INFO,console}"
YARN_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
HADOOP_YARN_HOME="${HADOOP_YARN_HOME:-$HADOOP_PREFIX}"
HADOOP_IDENT_STRING="${YARN_IDENT_STRING:-$HADOOP_IDENT_STRING}"
YARN_IDENT_STRING="${HADOOP_IDENT_STRING}"
YARN_OPTS="${YARN_OPTS:-$HADOOP_OPTS}"
# YARN-1429 added the completely superfluous YARN_USER_CLASSPATH
# env var. We're going to override HADOOP_USER_CLASSPATH to keep
# consistency with the rest of the duplicate/useless env vars
HADOOP_USER_CLASSPATH="${YARN_USER_CLASSPATH:-$HADOOP_USER_CLASSPATH}"
YARN_USER_CLASSPATH="${HADOOP_USER_CLASSPATH}"
HADOOP_USER_CLASSPATH_FIRST="${YARN_USER_CLASSPATH_FIRST:-$HADOOP_USER_CLASSPATH_FIRST}"
YARN_USER_CLASSPATH_FIRST="${HADOOP_USER_CLASSPATH_FIRST}"
}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} _yc_this="${BASH_SOURCE-$0}"
if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_yc_this}")" >/dev/null && pwd -P)
. ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh fi
elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
. "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
. "$HADOOP_HOME"/libexec/hadoop-config.sh elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
. "${HADOOP_HOME}/libexec/hadoop-config.sh"
else else
echo "Hadoop common not found." echo "Hadoop common not found."
exit exit
fi fi
# Same glibc bug that discovered in Hadoop.
# Without this you can see very large vmem settings on containers.
export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
#check to see if the conf dir is given as an optional argument
if [ $# -gt 1 ]
then
if [ "--config" = "$1" ]
then
shift
confdir=$1
shift
YARN_CONF_DIR=$confdir
fi
fi
# Allow alternate conf dir location.
export YARN_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
#check to see it is specified whether to use the slaves or the
# masters file
if [ $# -gt 1 ]
then
if [ "--hosts" = "$1" ]
then
shift
slavesfile=$1
shift
export YARN_SLAVES="${YARN_CONF_DIR}/$slavesfile"
fi
fi

View File

@ -15,147 +15,32 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
function hadoop_usage
{
echo "Usage: yarn-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
}
# Runs a yarn command as a daemon. # let's locate libexec...
# if [[ -n "${HADOOP_PREFIX}" ]]; then
# Environment Variables DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
# else
# YARN_CONF_DIR Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf. this="${BASH_SOURCE-$0}"
# YARN_LOG_DIR Where log files are stored. PWD by default. bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# YARN_MASTER host:path where hadoop code should be rsync'd from DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
# YARN_PID_DIR The pid files are stored. /tmp by default. fi
# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default
# YARN_NICENESS The scheduling priority for daemons. Defaults to 0.
##
usage="Usage: yarn-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <yarn-command> " HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
# if no args specified, show usage HADOOP_NEW_CONFIG=true
if [ $# -le 1 ]; then if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
echo $usage . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1 exit 1
fi fi
bin=`dirname "${BASH_SOURCE-$0}"` daemonmode=$1
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
# get arguments
startStop=$1
shift
command=$1
shift shift
hadoop_rotate_log () exec "${HADOOP_YARN_HOME}/bin/yarn" \
{ --config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
fi
if [ "$YARN_IDENT_STRING" = "" ]; then
export YARN_IDENT_STRING="$USER"
fi
# get log directory
if [ "$YARN_LOG_DIR" = "" ]; then
export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ ! -w "$YARN_LOG_DIR" ] ; then
mkdir -p "$YARN_LOG_DIR"
chown $YARN_IDENT_STRING $YARN_LOG_DIR
fi
if [ "$YARN_PID_DIR" = "" ]; then
YARN_PID_DIR=/tmp
fi
# some variables
export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log
export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,RFA}
log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out
pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid
YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$YARN_NICENESS" = "" ]; then
export YARN_NICENESS=0
fi
case $startStop in
(start)
[ -w "$YARN_PID_DIR" ] || mkdir -p "$YARN_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
echo $command running as process `cat $pid`. Stop it first.
exit 1
fi
fi
if [ "$YARN_MASTER" != "" ]; then
echo rsync from $YARN_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$HADOOP_YARN_HOME"
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$HADOOP_YARN_HOME"
nohup nice -n $YARN_NICENESS "$HADOOP_YARN_HOME"/bin/yarn --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1
head "$log"
# capture the ulimit output
echo "ulimit -a" >> $log
ulimit -a >> $log 2>&1
;;
(stop)
if [ -f $pid ]; then
TARGET_PID=`cat $pid`
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $YARN_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else
echo no $command to stop
fi
rm -f $pid
else
echo no $command to stop
fi
;;
(*)
echo $usage
exit 1
;;
esac

View File

@ -16,23 +16,31 @@
# limitations under the License. # limitations under the License.
# Run a Yarn command on all slave hosts. function hadoop_usage
{
echo "Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) <yarn-command> <args...>"
}
usage="Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] [start this="${BASH_SOURCE-$0}"
|stop] command args..." bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
# if no args specified, show usage # let's locate libexec...
if [ $# -le 1 ]; then if [[ -n "${HADOOP_PREFIX}" ]]; then
echo $usage DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
else
DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
fi
HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
# shellcheck disable=SC2034
HADOOP_NEW_CONFIG=true
if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
. "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
else
echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
exit 1 exit 1
fi fi
bin=`dirname "${BASH_SOURCE-$0}"` hadoop_connect_to_hosts "${bin}/yarn-daemon.sh" \
bin=`cd "$bin"; pwd` --config "${HADOOP_CONF_DIR}" "$@"
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
exec "$bin/slaves.sh" --config $YARN_CONF_DIR cd "$HADOOP_YARN_HOME" \; "$bin/yarn-daemon.sh" --config $YARN_CONF_DIR "$@"

View File

@ -13,118 +13,115 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
##
## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
## WORK DONE BY THE yarn AND RELATED COMMANDS.
##
## Precedence rules:
##
## yarn-env.sh > hadoop-env.sh > hard-coded defaults
##
## YARN_xyz > HADOOP_xyz > hard-coded defaults
##
###
# Generic settings for YARN
###
# User for YARN daemons # User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn} export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
# resolve links - $0 may be a softlink #
export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}" # By default, YARN will use HADOOP_CONF_DIR. Specify a custom
# YARN_CONF_DIR here
# export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
#
# some Java parameters # Override Hadoop's log directory & file
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/ # export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
if [ "$JAVA_HOME" != "" ]; then # export YARN_LOGFILE='yarn.log'
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java # Need a custom-to-YARN service-level authorization policy file?
JAVA_HEAP_MAX=-Xmx1000m # export YARN_POLICYFILE="yarn-policy.xml"
# For setting YARN specific HEAP sizes please use this #Override the log4j settings for all YARN apps
# Parameter and set appropriately # export YARN_ROOT_LOGGER="INFO,console"
# YARN_HEAPSIZE=1000
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi
###
# Resource Manager specific parameters # Resource Manager specific parameters
###
# Specify the max Heapsize for the ResourceManager using a numerical value # Specify the max heapsize for the ResourceManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000. # the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS # This value will be overridden by an Xmx setting specified in either YARN_OPTS,
# and/or YARN_RESOURCEMANAGER_OPTS. # HADOOP_OPTS, and/or YARN_RESOURCEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX # If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export YARN_RESOURCEMANAGER_HEAPSIZE=1000 #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS
# and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the ResourceManager. # Specify the JVM options to be used when starting the ResourceManager.
# These options will be appended to the options specified as YARN_OPTS # These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS # and therefore may override any similar flags set in YARN_OPTS
#export YARN_RESOURCEMANAGER_OPTS= #
# Examples for a Sun/Oracle JDK:
# a) override the appsummary log file:
# export YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log -Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY"
#
# b) Set JMX options
# export YARN_RESOURCEMANAGER_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
#
# c) Set garbage collection logs from hadoop-env.sh
# export YARN_RESOURCE_MANAGER_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
# d) ... or set them directly
# export YARN_RESOURCEMANAGER_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
#
#
# export YARN_RESOURCEMANAGER_OPTS=
###
# Node Manager specific parameters # Node Manager specific parameters
###
# Specify the max Heapsize for the NodeManager using a numerical value # Specify the max Heapsize for the NodeManager using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000. # the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS # This value will be overridden by an Xmx setting specified in either YARN_OPTS,
# and/or YARN_NODEMANAGER_OPTS. # HADOOP_OPTS, and/or YARN_NODEMANAGER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX # If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two. # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export YARN_NODEMANAGER_HEAPSIZE=1000 #export YARN_NODEMANAGER_HEAPSIZE=1000
# Specify the JVM options to be used when starting the NodeManager. # Specify the JVM options to be used when starting the NodeManager.
# These options will be appended to the options specified as YARN_OPTS # These options will be appended to the options specified as YARN_OPTS
# and therefore may override any similar flags set in YARN_OPTS # and therefore may override any similar flags set in YARN_OPTS
#
# See ResourceManager for some examples
#
#export YARN_NODEMANAGER_OPTS= #export YARN_NODEMANAGER_OPTS=
# so that filenames w/ spaces are handled correctly in loops below ###
IFS= # TimeLineServer specifc parameters
###
# Specify the max Heapsize for the timeline server using a numerical value
# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
# the value to 1000.
# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
# HADOOP_OPTS, and/or YARN_TIMELINESERVER_OPTS.
# If not specified, the default value will be picked from either YARN_HEAPMAX
# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
#
#export YARN_TIMELINESERVER_HEAPSIZE=1000
# default log directory & file # Specify the JVM options to be used when starting the TimeLineServer.
if [ "$YARN_LOG_DIR" = "" ]; then # These options will be appended to the options specified as YARN_OPTS
YARN_LOG_DIR="$HADOOP_YARN_HOME/logs" # and therefore may override any similar flags set in YARN_OPTS
fi #
if [ "$YARN_LOGFILE" = "" ]; then # See ResourceManager for some examples
YARN_LOGFILE='yarn.log' #
fi #export YARN_TIMELINESERVER_OPTS=
# default policy file for service-level authorization
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
MAC_OSX=false
case "`uname`" in
Darwin*) MAC_OSX=true;;
esac
if $MAC_OSX; then
YARN_OPTS="$YARN_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
fi
YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"

View File

@ -1161,6 +1161,9 @@ private void resetDispatcher() {
((Service)dispatcher).init(this.conf); ((Service)dispatcher).init(this.conf);
((Service)dispatcher).start(); ((Service)dispatcher).start();
removeService((Service)rmDispatcher); removeService((Service)rmDispatcher);
// Need to stop previous rmDispatcher before assigning new dispatcher
// otherwise causes "AsyncDispatcher event handler" thread leak
((Service) rmDispatcher).stop();
rmDispatcher = dispatcher; rmDispatcher = dispatcher;
addIfService(rmDispatcher); addIfService(rmDispatcher);
rmContext.setDispatcher(rmDispatcher); rmContext.setDispatcher(rmDispatcher);

View File

@ -23,10 +23,14 @@
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
@ -34,18 +38,25 @@
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -54,6 +65,7 @@
import com.google.common.util.concurrent.SettableFuture; import com.google.common.util.concurrent.SettableFuture;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public abstract class AbstractYarnScheduler public abstract class AbstractYarnScheduler
<T extends SchedulerApplicationAttempt, N extends SchedulerNode> <T extends SchedulerApplicationAttempt, N extends SchedulerNode>
@ -72,6 +84,7 @@ public abstract class AbstractYarnScheduler
protected RMContext rmContext; protected RMContext rmContext;
protected Map<ApplicationId, SchedulerApplication<T>> applications; protected Map<ApplicationId, SchedulerApplication<T>> applications;
protected int nmExpireInterval;
protected final static List<Container> EMPTY_CONTAINER_LIST = protected final static List<Container> EMPTY_CONTAINER_LIST =
new ArrayList<Container>(); new ArrayList<Container>();
@ -87,6 +100,15 @@ public AbstractYarnScheduler(String name) {
super(name); super(name);
} }
@Override
public void serviceInit(Configuration conf) throws Exception {
nmExpireInterval =
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
createReleaseCache();
super.serviceInit(conf);
}
public synchronized List<Container> getTransferredContainers( public synchronized List<Container> getTransferredContainers(
ApplicationAttemptId currentAttempt) { ApplicationAttemptId currentAttempt) {
ApplicationId appId = currentAttempt.getApplicationId(); ApplicationId appId = currentAttempt.getApplicationId();
@ -281,6 +303,19 @@ public synchronized void recoverContainersOnNode(
((RMContainerImpl)rmContainer).setAMContainer(true); ((RMContainerImpl)rmContainer).setAMContainer(true);
} }
} }
synchronized (schedulerAttempt) {
Set<ContainerId> releases = schedulerAttempt.getPendingRelease();
if (releases.contains(container.getContainerId())) {
// release the container
rmContainer.handle(new RMContainerFinishedEvent(container
.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED));
releases.remove(container.getContainerId());
LOG.info(container.getContainerId() + " is released by application.");
}
}
} }
} }
@ -320,6 +355,62 @@ protected void recoverResourceRequestForContainer(RMContainer rmContainer) {
} }
} }
protected void createReleaseCache() {
// Cleanup the cache after nm expire interval.
new Timer().schedule(new TimerTask() {
@Override
public void run() {
for (SchedulerApplication<T> app : applications.values()) {
T attempt = app.getCurrentAppAttempt();
synchronized (attempt) {
for (ContainerId containerId : attempt.getPendingRelease()) {
RMAuditLogger.logFailure(
app.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container",
"Scheduler",
"Trying to release container not owned by app or with invalid id.",
attempt.getApplicationId(), containerId);
}
attempt.getPendingRelease().clear();
}
}
LOG.info("Release request cache is cleaned up");
}
}, nmExpireInterval);
}
// clean up a completed container
protected abstract void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event);
protected void releaseContainers(List<ContainerId> containers,
SchedulerApplicationAttempt attempt) {
for (ContainerId containerId : containers) {
RMContainer rmContainer = getRMContainer(containerId);
if (rmContainer == null) {
if (System.currentTimeMillis() - ResourceManager.getClusterTimeStamp()
< nmExpireInterval) {
LOG.info(containerId + " doesn't exist. Add the container"
+ " to the release request cache as it maybe on recovery.");
synchronized (attempt) {
attempt.getPendingRelease().add(containerId);
}
} else {
RMAuditLogger.logFailure(attempt.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "Scheduler",
"Trying to release container not owned by app or with invalid id.",
attempt.getApplicationId(), containerId);
}
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(containerId,
SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED);
}
}
public SchedulerNode getSchedulerNode(NodeId nodeId) { public SchedulerNode getSchedulerNode(NodeId nodeId) {
return nodes.get(nodeId); return nodes.get(nodeId);
} }

View File

@ -17,13 +17,14 @@
*/ */
package org.apache.hadoop.yarn.server.resourcemanager.scheduler; package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import com.google.common.base.Preconditions;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -41,7 +42,6 @@
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@ -53,6 +53,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.resource.Resources;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultiset; import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset; import com.google.common.collect.Multiset;
@ -87,6 +88,13 @@ public class SchedulerApplicationAttempt {
protected List<RMContainer> newlyAllocatedContainers = protected List<RMContainer> newlyAllocatedContainers =
new ArrayList<RMContainer>(); new ArrayList<RMContainer>();
// This pendingRelease is used in work-preserving recovery scenario to keep
// track of the AM's outstanding release requests. RM on recovery could
// receive the release request form AM before it receives the container status
// from NM for recovery. In this case, the to-be-recovered containers reported
// by NM should not be recovered.
private Set<ContainerId> pendingRelease = null;
/** /**
* Count how many times the application has been given an opportunity * Count how many times the application has been given an opportunity
* to schedule a task at each priority. Each time the scheduler * to schedule a task at each priority. Each time the scheduler
@ -114,7 +122,7 @@ public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId,
new AppSchedulingInfo(applicationAttemptId, user, queue, new AppSchedulingInfo(applicationAttemptId, user, queue,
activeUsersManager, rmContext.getEpoch()); activeUsersManager, rmContext.getEpoch());
this.queue = queue; this.queue = queue;
this.pendingRelease = new HashSet<ContainerId>();
if (rmContext.getRMApps() != null && if (rmContext.getRMApps() != null &&
rmContext.getRMApps() rmContext.getRMApps()
.containsKey(applicationAttemptId.getApplicationId())) { .containsKey(applicationAttemptId.getApplicationId())) {
@ -163,6 +171,10 @@ public Map<String, ResourceRequest> getResourceRequests(Priority priority) {
return appSchedulingInfo.getResourceRequests(priority); return appSchedulingInfo.getResourceRequests(priority);
} }
public Set<ContainerId> getPendingRelease() {
return this.pendingRelease;
}
public int getNewContainerId() { public int getNewContainerId() {
return appSchedulingInfo.getNewContainerId(); return appSchedulingInfo.getNewContainerId();
} }

View File

@ -54,8 +54,6 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.*; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.*;
@ -199,7 +197,7 @@ public Configuration getConf() {
private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5; private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5;
private boolean overrideWithQueueMappings = false; private boolean overrideWithQueueMappings = false;
private List<QueueMapping> mappings = new ArrayList<QueueMapping>(); private List<QueueMapping> mappings = null;
private Groups groups; private Groups groups;
@VisibleForTesting @VisibleForTesting
@ -789,21 +787,7 @@ ask, getResourceCalculator(), getClusterResource(),
getMinimumResourceCapability(), maximumAllocation); getMinimumResourceCapability(), maximumAllocation);
// Release containers // Release containers
for (ContainerId releasedContainerId : release) { releaseContainers(release, application);
RMContainer rmContainer = getRMContainer(releasedContainerId);
if (rmContainer == null) {
RMAuditLogger.logFailure(application.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "CapacityScheduler",
"Trying to release container not owned by app or with invalid id",
application.getApplicationId(), releasedContainerId);
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
releasedContainerId,
SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED);
}
synchronized (application) { synchronized (application) {
@ -1098,7 +1082,8 @@ private synchronized void removeNode(RMNode nodeInfo) {
} }
@Lock(CapacityScheduler.class) @Lock(CapacityScheduler.class)
private synchronized void completedContainer(RMContainer rmContainer, @Override
protected synchronized void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) { ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) { if (rmContainer == null) {
LOG.info("Null container completed..."); LOG.info("Null container completed...");

View File

@ -49,8 +49,6 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights;
@ -810,7 +808,8 @@ private synchronized void removeApplicationAttempt(
/** /**
* Clean up a completed container. * Clean up a completed container.
*/ */
private synchronized void completedContainer(RMContainer rmContainer, @Override
protected synchronized void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) { ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) { if (rmContainer == null) {
LOG.info("Null container completed..."); LOG.info("Null container completed...");
@ -913,21 +912,7 @@ public Allocation allocate(ApplicationAttemptId appAttemptId,
} }
// Release containers // Release containers
for (ContainerId releasedContainerId : release) { releaseContainers(release, application);
RMContainer rmContainer = getRMContainer(releasedContainerId);
if (rmContainer == null) {
RMAuditLogger.logFailure(application.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "FairScheduler",
"Trying to release container not owned by app or with invalid id",
application.getApplicationId(), releasedContainerId);
}
completedContainer(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
releasedContainerId,
SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED);
}
synchronized (application) { synchronized (application) {
if (!ask.isEmpty()) { if (!ask.isEmpty()) {

View File

@ -52,8 +52,6 @@
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@ -89,7 +87,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.server.utils.Lock; import org.apache.hadoop.yarn.server.utils.Lock;
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
@ -295,21 +292,7 @@ public Allocation allocate(
clusterResource, minimumAllocation, maximumAllocation); clusterResource, minimumAllocation, maximumAllocation);
// Release containers // Release containers
for (ContainerId releasedContainer : release) { releaseContainers(release, application);
RMContainer rmContainer = getRMContainer(releasedContainer);
if (rmContainer == null) {
RMAuditLogger.logFailure(application.getUser(),
AuditConstants.RELEASE_CONTAINER,
"Unauthorized access or invalid container", "FifoScheduler",
"Trying to release container not owned by app or with invalid id",
application.getApplicationId(), releasedContainer);
}
containerCompleted(rmContainer,
SchedulerUtils.createAbnormalContainerStatus(
releasedContainer,
SchedulerUtils.RELEASED_CONTAINER),
RMContainerEventType.RELEASED);
}
synchronized (application) { synchronized (application) {
@ -443,7 +426,7 @@ private synchronized void doneApplicationAttempt(
LOG.info("Skip killing " + container.getContainerId()); LOG.info("Skip killing " + container.getContainerId());
continue; continue;
} }
containerCompleted(container, completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus( SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
RMContainerEventType.KILL); RMContainerEventType.KILL);
@ -717,7 +700,7 @@ private synchronized void nodeUpdate(RMNode rmNode) {
for (ContainerStatus completedContainer : completedContainers) { for (ContainerStatus completedContainer : completedContainers) {
ContainerId containerId = completedContainer.getContainerId(); ContainerId containerId = completedContainer.getContainerId();
LOG.debug("Container FINISHED: " + containerId); LOG.debug("Container FINISHED: " + containerId);
containerCompleted(getRMContainer(containerId), completedContainer(getRMContainer(containerId),
completedContainer, RMContainerEventType.FINISHED); completedContainer, RMContainerEventType.FINISHED);
} }
@ -818,7 +801,7 @@ public void handle(SchedulerEvent event) {
ContainerExpiredSchedulerEvent containerExpiredEvent = ContainerExpiredSchedulerEvent containerExpiredEvent =
(ContainerExpiredSchedulerEvent) event; (ContainerExpiredSchedulerEvent) event;
ContainerId containerid = containerExpiredEvent.getContainerId(); ContainerId containerid = containerExpiredEvent.getContainerId();
containerCompleted(getRMContainer(containerid), completedContainer(getRMContainer(containerid),
SchedulerUtils.createAbnormalContainerStatus( SchedulerUtils.createAbnormalContainerStatus(
containerid, containerid,
SchedulerUtils.EXPIRED_CONTAINER), SchedulerUtils.EXPIRED_CONTAINER),
@ -831,7 +814,8 @@ public void handle(SchedulerEvent event) {
} }
@Lock(FifoScheduler.class) @Lock(FifoScheduler.class)
private synchronized void containerCompleted(RMContainer rmContainer, @Override
protected synchronized void completedContainer(RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event) { ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) { if (rmContainer == null) {
LOG.info("Null container completed..."); LOG.info("Null container completed...");
@ -881,7 +865,7 @@ private synchronized void removeNode(RMNode nodeInfo) {
} }
// Kill running containers // Kill running containers
for(RMContainer container : node.getRunningContainers()) { for(RMContainer container : node.getRunningContainers()) {
containerCompleted(container, completedContainer(container,
SchedulerUtils.createAbnormalContainerStatus( SchedulerUtils.createAbnormalContainerStatus(
container.getContainerId(), container.getContainerId(),
SchedulerUtils.LOST_CONTAINER), SchedulerUtils.LOST_CONTAINER),

View File

@ -49,7 +49,7 @@ public class MockAM {
private volatile int responseId = 0; private volatile int responseId = 0;
private final ApplicationAttemptId attemptId; private final ApplicationAttemptId attemptId;
private final RMContext context; private RMContext context;
private ApplicationMasterProtocol amRMProtocol; private ApplicationMasterProtocol amRMProtocol;
private final List<ResourceRequest> requests = new ArrayList<ResourceRequest>(); private final List<ResourceRequest> requests = new ArrayList<ResourceRequest>();
@ -61,8 +61,10 @@ public MockAM(RMContext context, ApplicationMasterProtocol amRMProtocol,
this.amRMProtocol = amRMProtocol; this.amRMProtocol = amRMProtocol;
this.attemptId = attemptId; this.attemptId = attemptId;
} }
void setAMRMProtocol(ApplicationMasterProtocol amRMProtocol) { public void setAMRMProtocol(ApplicationMasterProtocol amRMProtocol,
RMContext context) {
this.context = context;
this.amRMProtocol = amRMProtocol; this.amRMProtocol = amRMProtocol;
} }

View File

@ -171,7 +171,6 @@ public void testProgressFilter() throws Exception{
RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt(); am1.registerAppAttempt();
am1.setAMRMProtocol(rm.getApplicationMasterService());
AllocateRequestPBImpl allocateRequest = new AllocateRequestPBImpl(); AllocateRequestPBImpl allocateRequest = new AllocateRequestPBImpl();
List<ContainerId> release = new ArrayList<ContainerId>(); List<ContainerId> release = new ArrayList<ContainerId>();

View File

@ -331,6 +331,10 @@ protected Dispatcher createDispatcher() {
rm.adminService.transitionToStandby(requestInfo); rm.adminService.transitionToStandby(requestInfo);
rm.adminService.transitionToActive(requestInfo); rm.adminService.transitionToActive(requestInfo);
rm.adminService.transitionToStandby(requestInfo); rm.adminService.transitionToStandby(requestInfo);
MyCountingDispatcher dispatcher =
(MyCountingDispatcher) rm.getRMContext().getDispatcher();
assertTrue(!dispatcher.isStopped());
rm.adminService.transitionToActive(requestInfo); rm.adminService.transitionToActive(requestInfo);
assertEquals(errorMessageForEventHandler, expectedEventHandlerCount, assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
@ -339,6 +343,11 @@ protected Dispatcher createDispatcher() {
assertEquals(errorMessageForService, expectedServiceCount, assertEquals(errorMessageForService, expectedServiceCount,
rm.getServices().size()); rm.getServices().size());
// Keep the dispatcher reference before transitioning to standby
dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
rm.adminService.transitionToStandby(requestInfo); rm.adminService.transitionToStandby(requestInfo);
assertEquals(errorMessageForEventHandler, expectedEventHandlerCount, assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
((MyCountingDispatcher) rm.getRMContext().getDispatcher()) ((MyCountingDispatcher) rm.getRMContext().getDispatcher())
@ -346,6 +355,8 @@ protected Dispatcher createDispatcher() {
assertEquals(errorMessageForService, expectedServiceCount, assertEquals(errorMessageForService, expectedServiceCount,
rm.getServices().size()); rm.getServices().size());
assertTrue(dispatcher.isStopped());
rm.stop(); rm.stop();
} }
@ -492,6 +503,8 @@ class MyCountingDispatcher extends AbstractService implements Dispatcher {
private int eventHandlerCount; private int eventHandlerCount;
private volatile boolean stopped = false;
public MyCountingDispatcher() { public MyCountingDispatcher() {
super("MyCountingDispatcher"); super("MyCountingDispatcher");
this.eventHandlerCount = 0; this.eventHandlerCount = 0;
@ -510,5 +523,15 @@ public void register(Class<? extends Enum> eventType, EventHandler handler) {
public int getEventHandlerCount() { public int getEventHandlerCount() {
return this.eventHandlerCount; return this.eventHandlerCount;
} }
@Override
protected void serviceStop() throws Exception {
this.stopped = true;
super.serviceStop();
}
public boolean isStopped() {
return this.stopped;
}
} }
} }

View File

@ -289,7 +289,7 @@ public void testRMRestart() throws Exception {
// verify old AM is not accepted // verify old AM is not accepted
// change running AM to talk to new RM // change running AM to talk to new RM
am1.setAMRMProtocol(rm2.getApplicationMasterService()); am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
AllocateResponse allocResponse = am1.allocate( AllocateResponse allocResponse = am1.allocate(
new ArrayList<ResourceRequest>(), new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>()); new ArrayList<ContainerId>());
@ -1663,7 +1663,7 @@ public void testQueueMetricsOnRMRestart() throws Exception {
nm1.setResourceTrackerService(rm2.getResourceTrackerService()); nm1.setResourceTrackerService(rm2.getResourceTrackerService());
// recover app // recover app
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()); RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
am1.setAMRMProtocol(rm2.getApplicationMasterService()); am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()); am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true); nm1.nodeHeartbeat(true);
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService()); nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());

View File

@ -33,10 +33,13 @@
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
@ -72,6 +75,9 @@
import org.junit.runner.RunWith; import org.junit.runner.RunWith;
import org.junit.runners.Parameterized; import org.junit.runners.Parameterized;
import com.google.common.base.Supplier;
@SuppressWarnings({"rawtypes", "unchecked"}) @SuppressWarnings({"rawtypes", "unchecked"})
@RunWith(value = Parameterized.class) @RunWith(value = Parameterized.class)
public class TestWorkPreservingRMRestart { public class TestWorkPreservingRMRestart {
@ -572,8 +578,8 @@ public void testAppReregisterOnRMWorkPreservingRestart() throws Exception {
rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED); rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED); rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED);
am0.setAMRMProtocol(rm2.getApplicationMasterService()); am0.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am0.registerAppAttempt(false); am0.registerAppAttempt(true);
rm2.waitForState(app0.getApplicationId(), RMAppState.RUNNING); rm2.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.RUNNING); rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
@ -646,6 +652,69 @@ public void testRecoverSchedulerAppAndAttemptSynchronously() throws Exception {
waitForNumContainersToRecover(2, rm2, am0.getApplicationAttemptId()); waitForNumContainersToRecover(2, rm2, am0.getApplicationAttemptId());
} }
// Test if RM on recovery receives the container release request from AM
// before it receives the container status reported by NM for recovery. this
// container should not be recovered.
@Test (timeout = 30000)
public void testReleasedContainerNotRecovered() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
rm1.start();
RMApp app1 = rm1.submitApp(1024);
final MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// Re-start RM
conf.setInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS, 8000);
rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
am1.registerAppAttempt(true);
// try to release a container before the container is actually recovered.
final ContainerId runningContainer =
ContainerId.newInstance(am1.getApplicationAttemptId(), 2);
am1.allocate(null, Arrays.asList(runningContainer));
// send container statuses to recover the containers
List<NMContainerStatus> containerStatuses =
createNMContainerStatusForApp(am1);
nm1.registerNode(containerStatuses, null);
// only the am container should be recovered.
waitForNumContainersToRecover(1, rm2, am1.getApplicationAttemptId());
final AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm2.getResourceScheduler();
// cached release request is cleaned.
// assertFalse(scheduler.getPendingRelease().contains(runningContainer));
AllocateResponse response = am1.allocate(null, null);
// AM gets notified of the completed container.
boolean receivedCompletedContainer = false;
for (ContainerStatus status : response.getCompletedContainersStatuses()) {
if (status.getContainerId().equals(runningContainer)) {
receivedCompletedContainer = true;
}
}
assertTrue(receivedCompletedContainer);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
public Boolean get() {
// release cache is cleaned up and previous running container is not
// recovered
return scheduler.getApplicationAttempt(am1.getApplicationAttemptId())
.getPendingRelease().isEmpty()
&& scheduler.getRMContainer(runningContainer) == null;
}
}, 1000, 20000);
}
private void asserteMetrics(QueueMetrics qm, int appsSubmitted, private void asserteMetrics(QueueMetrics qm, int appsSubmitted,
int appsPending, int appsRunning, int appsCompleted, int appsPending, int appsRunning, int appsCompleted,
int allocatedContainers, int availableMB, int availableVirtualCores, int allocatedContainers, int availableMB, int availableVirtualCores,
@ -661,7 +730,7 @@ private void asserteMetrics(QueueMetrics qm, int appsSubmitted,
assertEquals(allocatedVirtualCores, qm.getAllocatedVirtualCores()); assertEquals(allocatedVirtualCores, qm.getAllocatedVirtualCores());
} }
private void waitForNumContainersToRecover(int num, MockRM rm, public static void waitForNumContainersToRecover(int num, MockRM rm,
ApplicationAttemptId attemptId) throws Exception { ApplicationAttemptId attemptId) throws Exception {
AbstractYarnScheduler scheduler = AbstractYarnScheduler scheduler =
(AbstractYarnScheduler) rm.getResourceScheduler(); (AbstractYarnScheduler) rm.getResourceScheduler();
@ -674,7 +743,9 @@ private void waitForNumContainersToRecover(int num, MockRM rm,
attempt = scheduler.getApplicationAttempt(attemptId); attempt = scheduler.getApplicationAttempt(attemptId);
} }
while (attempt.getLiveContainers().size() < num) { while (attempt.getLiveContainers().size() < num) {
System.out.println("Wait for " + num + " containers to recover."); System.out.println("Wait for " + num
+ " containers to recover. currently: "
+ attempt.getLiveContainers().size());
Thread.sleep(200); Thread.sleep(200);
} }
} }