diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index 6cf872c5eb..b57a4c1305 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -28,7 +28,7 @@ function hadoop_usage hadoop_add_option "hostnames list[,of,host,names]" "hosts to use in slave mode" hadoop_add_option "loglevel level" "set the log4j level for this command" hadoop_add_option "hosts filename" "list of hosts to use in slave mode" - hadoop_add_option "slaves" "turn on slave mode" + hadoop_add_option "workers" "turn on worker mode" hadoop_add_subcommand "checknative" "check native Hadoop and compression libraries availability" hadoop_add_subcommand "classpath" "prints the class path needed to get the Hadoop jar and the required libraries" @@ -205,8 +205,8 @@ fi hadoop_verify_user "${HADOOP_SUBCMD}" -if [[ ${HADOOP_SLAVE_MODE} = true ]]; then - hadoop_common_slave_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}" +if [[ ${HADOOP_WORKER_MODE} = true ]]; then + hadoop_common_worker_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}" exit $? fi diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd index 8d4b897eaa..d77dc5346a 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd @@ -80,12 +80,12 @@ if "%1" == "--config" ( ) @rem -@rem check to see it is specified whether to use the slaves or the +@rem check to see it is specified whether to use the workers or the @rem masters file @rem if "%1" == "--hosts" ( - set HADOOP_SLAVES=%HADOOP_CONF_DIR%\%2 + set HADOOP_WORKERS=%HADOOP_CONF_DIR%\%2 shift shift ) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh index 104247adf7..ba8d69d382 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh @@ -113,6 +113,10 @@ hadoop_exec_userfuncs hadoop_exec_user_hadoopenv hadoop_verify_confdir +hadoop_deprecate_envvar HADOOP_SLAVES HADOOP_WORKERS +hadoop_deprecate_envvar HADOOP_SLAVE_NAMES HADOOP_WORKER_NAMES +hadoop_deprecate_envvar HADOOP_SLAVE_SLEEP HADOOP_WORKER_SLEEP + # do all the OS-specific startup bits here # this allows us to get a decent JAVA_HOME, # call crle for LD_LIBRARY_PATH, etc. diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh index ae1e324823..55304916ad 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh @@ -57,13 +57,13 @@ else fi hadoop_error "WARNING: Use of this script to ${daemonmode} HDFS daemons is deprecated." -hadoop_error "WARNING: Attempting to execute replacement \"hdfs --slaves --daemon ${daemonmode}\" instead." +hadoop_error "WARNING: Attempting to execute replacement \"hdfs --workers --daemon ${daemonmode}\" instead." # # Original input was usually: # hadoop-daemons.sh (shell options) (start|stop) (datanode|...) (daemon options) # we're going to turn this into -# hdfs --slaves --daemon (start|stop) (rest of options) +# hdfs --workers --daemon (start|stop) (rest of options) # for (( i = 0; i < ${#HADOOP_USER_PARAMS[@]}; i++ )) do @@ -74,4 +74,4 @@ do fi done -${hdfsscript} --slaves --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}" +${hdfsscript} --workers --daemon "${daemonmode}" "${HADOOP_USER_PARAMS[@]}" diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index 99c47384bd..c38059903b 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -602,25 +602,25 @@ function hadoop_basic_init HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10} } -## @description Set the slave support information to the contents +## @description Set the worker support information to the contents ## @description of `filename` ## @audience public ## @stability stable ## @replaceable no ## @param filename ## @return will exit if file does not exist -function hadoop_populate_slaves_file +function hadoop_populate_workers_file { - local slavesfile=$1 + local workersfile=$1 shift - if [[ -f "${slavesfile}" ]]; then + if [[ -f "${workersfile}" ]]; then # shellcheck disable=2034 - HADOOP_SLAVES="${slavesfile}" - elif [[ -f "${HADOOP_CONF_DIR}/${slavesfile}" ]]; then + HADOOP_WORKERS="${workersfile}" + elif [[ -f "${HADOOP_CONF_DIR}/${workersfile}" ]]; then # shellcheck disable=2034 - HADOOP_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}" + HADOOP_WORKERS="${HADOOP_CONF_DIR}/${workersfile}" else - hadoop_error "ERROR: Cannot find hosts file \"${slavesfile}\"" + hadoop_error "ERROR: Cannot find hosts file \"${workersfile}\"" hadoop_exit_with_usage 1 fi } @@ -669,14 +669,14 @@ function hadoop_actual_ssh { # we are passing this function to xargs # should get hostname followed by rest of command line - local slave=$1 + local worker=$1 shift # shellcheck disable=SC2086 - ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /" + ssh ${HADOOP_SSH_OPTS} ${worker} $"${@// /\\ }" 2>&1 | sed "s/^/$worker: /" } -## @description Connect to ${HADOOP_SLAVES} or ${HADOOP_SLAVE_NAMES} +## @description Connect to ${HADOOP_WORKERS} or ${HADOOP_WORKER_NAMES} ## @description and execute command. ## @audience private ## @stability evolving @@ -687,45 +687,52 @@ function hadoop_connect_to_hosts { # shellcheck disable=SC2124 local params="$@" - local slave_file + local worker_file local tmpslvnames # # ssh (or whatever) to a host # # User can specify hostnames or a file where the hostnames are (not both) - if [[ -n "${HADOOP_SLAVES}" && -n "${HADOOP_SLAVE_NAMES}" ]] ; then - hadoop_error "ERROR: Both HADOOP_SLAVES and HADOOP_SLAVE_NAME were defined. Aborting." + if [[ -n "${HADOOP_WORKERS}" && -n "${HADOOP_WORKER_NAMES}" ]] ; then + hadoop_error "ERROR: Both HADOOP_WORKERS and HADOOP_WORKER_NAME were defined. Aborting." exit 1 - elif [[ -z "${HADOOP_SLAVE_NAMES}" ]]; then - slave_file=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves} + elif [[ -z "${HADOOP_WORKER_NAMES}" ]]; then + if [[ -n "${HADOOP_WORKERS}" ]]; then + worker_file=${HADOOP_WORKERS} + elif [[ -f "${HADOOP_CONF_DIR}/workers" ]]; then + worker_file=${HADOOP_CONF_DIR}/workers + elif [[ -f "${HADOOP_CONF_DIR}/slaves" ]]; then + hadoop_error "WARNING: 'slaves' file has been deprecated. Please use 'workers' file instead." + worker_file=${HADOOP_CONF_DIR}/slaves + fi fi # if pdsh is available, let's use it. otherwise default # to a loop around ssh. (ugh) if [[ -e '/usr/bin/pdsh' ]]; then - if [[ -z "${HADOOP_SLAVE_NAMES}" ]] ; then + if [[ -z "${HADOOP_WORKER_NAMES}" ]] ; then # if we were given a file, just let pdsh deal with it. # shellcheck disable=SC2086 PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \ - -f "${HADOOP_SSH_PARALLEL}" -w ^"${slave_file}" $"${@// /\\ }" 2>&1 + -f "${HADOOP_SSH_PARALLEL}" -w ^"${worker_file}" $"${@// /\\ }" 2>&1 else # no spaces allowed in the pdsh arg host list # shellcheck disable=SC2086 - tmpslvnames=$(echo ${SLAVE_NAMES} | tr -s ' ' ,) + tmpslvnames=$(echo ${HADOOP_WORKER_NAMES} | tr -s ' ' ,) PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \ -f "${HADOOP_SSH_PARALLEL}" \ -w "${tmpslvnames}" $"${@// /\\ }" 2>&1 fi else - if [[ -z "${HADOOP_SLAVE_NAMES}" ]]; then - HADOOP_SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${slave_file}") + if [[ -z "${HADOOP_WORKER_NAMES}" ]]; then + HADOOP_WORKER_NAMES=$(sed 's/#.*$//;/^$/d' "${worker_file}") fi hadoop_connect_to_hosts_without_pdsh "${params}" fi } -## @description Connect to ${SLAVE_NAMES} and execute command +## @description Connect to ${HADOOP_WORKER_NAMES} and execute command ## @description under the environment which does not support pdsh. ## @audience private ## @stability evolving @@ -736,24 +743,24 @@ function hadoop_connect_to_hosts_without_pdsh { # shellcheck disable=SC2124 local params="$@" - local slaves=(${HADOOP_SLAVE_NAMES}) - for (( i = 0; i < ${#slaves[@]}; i++ )) + local workers=(${HADOOP_WORKER_NAMES}) + for (( i = 0; i < ${#workers[@]}; i++ )) do if (( i != 0 && i % HADOOP_SSH_PARALLEL == 0 )); then wait fi # shellcheck disable=SC2086 - hadoop_actual_ssh "${slaves[$i]}" ${params} & + hadoop_actual_ssh "${workers[$i]}" ${params} & done wait } -## @description Utility routine to handle --slaves mode +## @description Utility routine to handle --workers mode ## @audience private ## @stability evolving ## @replaceable yes ## @param commandarray -function hadoop_common_slave_mode_execute +function hadoop_common_worker_mode_execute { # # input should be the command line as given by the user @@ -761,13 +768,13 @@ function hadoop_common_slave_mode_execute # local argv=("$@") - # if --slaves is still on the command line, remove it + # if --workers is still on the command line, remove it # to prevent loops # Also remove --hostnames and --hosts along with arg values local argsSize=${#argv[@]}; for (( i = 0; i < argsSize; i++ )) do - if [[ "${argv[$i]}" =~ ^--slaves$ ]]; then + if [[ "${argv[$i]}" =~ ^--workers$ ]]; then unset argv[$i] elif [[ "${argv[$i]}" =~ ^--hostnames$ ]] || [[ "${argv[$i]}" =~ ^--hosts$ ]]; then @@ -2051,13 +2058,13 @@ function hadoop_parse_args --hostnames) shift # shellcheck disable=SC2034 - HADOOP_SLAVE_NAMES="$1" + HADOOP_WORKER_NAMES="$1" shift ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2)) ;; --hosts) shift - hadoop_populate_slaves_file "$1" + hadoop_populate_workers_file "$1" shift ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2)) ;; @@ -2068,10 +2075,10 @@ function hadoop_parse_args shift ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+2)) ;; - --slaves) + --workers) shift # shellcheck disable=SC2034 - HADOOP_SLAVE_MODE=true + HADOOP_WORKER_MODE=true ((HADOOP_PARSE_COUNTER=HADOOP_PARSE_COUNTER+1)) ;; *) @@ -2104,4 +2111,4 @@ function hadoop_xml_escape function hadoop_sed_escape { sed -e 's/[\/&]/\\&/g' <<< "$1" -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh b/hadoop-common-project/hadoop-common/src/main/bin/workers.sh similarity index 86% rename from hadoop-common-project/hadoop-common/src/main/bin/slaves.sh rename to hadoop-common-project/hadoop-common/src/main/bin/workers.sh index 34bf0ebb2b..84ffabd857 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/slaves.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/workers.sh @@ -16,20 +16,20 @@ # limitations under the License. -# Run a shell command on all slave hosts. +# Run a shell command on all worker hosts. # # Environment Variables # -# HADOOP_SLAVES File naming remote hosts. -# Default is ${HADOOP_CONF_DIR}/slaves. +# HADOOP_WORKERS File naming remote hosts. +# Default is ${HADOOP_CONF_DIR}/workers. # HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_HOME}/conf. -# HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands. +# HADOOP_WORKER_SLEEP Seconds to sleep between spawning remote commands. # HADOOP_SSH_OPTS Options passed to ssh when running remote commands. ## function hadoop_usage { - echo "Usage: slaves.sh [--config confdir] command..." + echo "Usage: workers.sh [--config confdir] command..." } # let's locate libexec... diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index 3f19e459c4..6565d1d6a7 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -169,8 +169,8 @@ esac # export HADOOP_SSH_PARALLEL=10 # Filename which contains all of the hosts for any remote execution -# helper scripts # such as slaves.sh, start-dfs.sh, etc. -# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves" +# helper scripts # such as workers.sh, start-dfs.sh, etc. +# export HADOOP_WORKERS="${HADOOP_CONF_DIR}/workers" ### # Options for all daemons diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example index 91a2d8bdf6..0eeae3c804 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-user-functions.sh.example @@ -87,7 +87,7 @@ #} # -# Example: efficient command execution for the slaves +# Example: efficient command execution for the workers # # To improve performance, you can use xargs -P # instead of the for loop, if supported. @@ -108,7 +108,7 @@ # # list with each hostname read from stdin/pipe. But it consider one # # line as one argument while reading from stdin/pipe. So place each # # hostname in different lines while passing via pipe. -# tmpslvnames=$(echo "${HADOOP_SLAVE_NAMES}" | tr ' ' '\n' ) +# tmpslvnames=$(echo "${HADOOP_WORKER_NAMES}" | tr ' ' '\n' ) # echo "${tmpslvnames}" | \ # xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \ # -I {} bash -c -- "hadoop_actual_ssh {} ${params}" diff --git a/hadoop-yarn-project/hadoop-yarn/conf/slaves b/hadoop-common-project/hadoop-common/src/main/conf/workers similarity index 100% rename from hadoop-yarn-project/hadoop-yarn/conf/slaves rename to hadoop-common-project/hadoop-common/src/main/conf/workers diff --git a/hadoop-common-project/hadoop-common/src/main/java/overview.html b/hadoop-common-project/hadoop-common/src/main/java/overview.html index 5868617709..2c64121831 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/overview.html +++ b/hadoop-common-project/hadoop-common/src/main/java/overview.html @@ -23,33 +23,33 @@ Hadoop is a distributed computing platform. -
Hadoop primarily consists of the Hadoop Distributed FileSystem -(HDFS) and an +
Hadoop primarily consists of the Hadoop Distributed FileSystem +(HDFS) and an implementation of the Map-Reduce programming paradigm.
-Hadoop is a software framework that lets one easily write and run applications +
Hadoop is a software framework that lets one easily write and run applications that process vast amounts of data. Here's what makes Hadoop especially useful:
Finally, list all slave hostnames or IP addresses in your -conf/slaves file, one per line. Then format your filesystem +
Finally, list all worker hostnames or IP addresses in your +conf/workers file, one per line. Then format your filesystem and start your cluster on your master node, as above.