diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index b57a4c1305..450543dae7 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -161,10 +161,6 @@ function hadoopcmd_case fi ;; esac - - # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } # This script runs the hadoop core commands. @@ -194,6 +190,8 @@ fi HADOOP_SUBCMD=$1 shift +hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + HADOOP_SUBCMD_ARGS=("$@") if declare -f hadoop_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then @@ -203,15 +201,20 @@ else hadoopcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" fi -hadoop_verify_user "${HADOOP_SUBCMD}" +hadoop_add_client_opts if [[ ${HADOOP_WORKER_MODE} = true ]]; then hadoop_common_worker_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}" exit $? fi +hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + + hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + hadoop_verify_secure_prereq hadoop_setup_secure_service priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index 75554f07bc..b6e2b59133 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -306,6 +306,13 @@ function hadoop_bootstrap HADOOP_TOOLS_DIR=${HADOOP_TOOLS_DIR:-"share/hadoop/tools"} HADOOP_TOOLS_LIB_JARS_DIR=${HADOOP_TOOLS_LIB_JARS_DIR:-"${HADOOP_TOOLS_DIR}/lib"} + # by default, whatever we are about to run doesn't support + # daemonization + HADOOP_SUBCMD_SUPPORTDAEMONIZATION=false + + # shellcheck disable=SC2034 + HADOOP_SUBCMD_SECURESERVICE=false + # usage output set to zero hadoop_reset_usage @@ -1230,6 +1237,20 @@ function hadoop_translate_cygwin_path fi } +## @description Adds the HADOOP_CLIENT_OPTS variable to +## @description HADOOP_OPTS if HADOOP_SUBCMD_SUPPORTDAEMONIZATION is false +## @audience public +## @stability stable +## @replaceable yes +function hadoop_add_client_opts +{ + if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = false + || -z "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" ]]; then + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + fi +} + ## @description Finish configuring Hadoop specific system properties ## @description prior to executing Java ## @audience private @@ -1963,17 +1984,130 @@ function hadoop_secure_daemon_handler ## @return will exit on failure conditions function hadoop_verify_user { - local command=$1 - local uservar="HADOOP_${command}_USER" + declare program=$1 + declare command=$2 + declare uprogram + declare ucommand + declare uvar - if [[ -n ${!uservar} ]]; then - if [[ ${!uservar} != "${USER}" ]]; then - hadoop_error "ERROR: ${command} can only be executed by ${!uservar}." + if [[ -z "${BASH_VERSINFO[0]}" ]] \ + || [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then + uprogram=$(echo "${program}" | tr '[:lower:]' '[:upper:]') + ucommand=$(echo "${command}" | tr '[:lower:]' '[:upper:]') + else + uprogram=${program^^} + ucommand=${command^^} + fi + + uvar="${uprogram}_${ucommand}_USER" + + if [[ -n ${!uvar} ]]; then + if [[ ${!uvar} != "${USER}" ]]; then + hadoop_error "ERROR: ${command} can only be executed by ${!uvar}." exit 1 fi fi } +## @description Add custom (program)_(command)_OPTS to HADOOP_OPTS. +## @description Also handles the deprecated cases from pre-3.x. +## @audience public +## @stability stable +## @replaceable yes +## @param program +## @param subcommand +## @return will exit on failure conditions +function hadoop_subcommand_opts +{ + declare program=$1 + declare command=$2 + declare uvar + declare depvar + declare uprogram + declare ucommand + + if [[ -z "${program}" || -z "${command}" ]]; then + return 1 + fi + + # bash 4 and up have built-in ways to upper and lower + # case the contents of vars. This is faster than + # calling tr. + + if [[ -z "${BASH_VERSINFO[0]}" ]] \ + || [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then + uprogram=$(echo "${program}" | tr '[:lower:]' '[:upper:]') + ucommand=$(echo "${command}" | tr '[:lower:]' '[:upper:]') + else + uprogram=${program^^} + ucommand=${command^^} + fi + + uvar="${uprogram}_${ucommand}_OPTS" + + # Let's handle all of the deprecation cases early + # HADOOP_NAMENODE_OPTS -> HDFS_NAMENODE_OPTS + + depvar="HADOOP_${ucommand}_OPTS" + + if [[ "${depvar}" != "${uvar}" ]]; then + if [[ -n "${!depvar}" ]]; then + hadoop_deprecate_envvar "${depvar}" "${uvar}" + fi + fi + + if [[ -n ${!uvar} ]]; then + hadoop_debug "Appending ${uvar} onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${!uvar}" + return 0 + fi +} + +## @description Add custom (program)_(command)_SECURE_EXTRA_OPTS to HADOOP_OPTS. +## @description This *does not* handle the pre-3.x deprecated cases +## @audience public +## @stability stable +## @replaceable yes +## @param program +## @param subcommand +## @return will exit on failure conditions +function hadoop_subcommand_secure_opts +{ + declare program=$1 + declare command=$2 + declare uvar + declare uprogram + declare ucommand + + if [[ -z "${program}" || -z "${command}" ]]; then + return 1 + fi + + # bash 4 and up have built-in ways to upper and lower + # case the contents of vars. This is faster than + # calling tr. + + if [[ -z "${BASH_VERSINFO[0]}" ]] \ + || [[ "${BASH_VERSINFO[0]}" -lt 4 ]]; then + uprogram=$(echo "${program}" | tr '[:lower:]' '[:upper:]') + ucommand=$(echo "${command}" | tr '[:lower:]' '[:upper:]') + else + uprogram=${program^^} + ucommand=${command^^} + fi + + # HDFS_DATANODE_SECURE_EXTRA_OPTS + # HDFS_NFS3_SECURE_EXTRA_OPTS + # ... + uvar="${uprogram}_${ucommand}_SECURE_EXTRA_OPTS" + + if [[ -n ${!uvar} ]]; then + hadoop_debug "Appending ${uvar} onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${!uvar}" + return 0 + fi +} + ## @description Perform the 'hadoop classpath', etc subcommand with the given ## @description parameters ## @audience private diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index f4493f1f20..a78f3f62fa 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -294,16 +294,16 @@ esac # and therefore may override any similar flags set in HADOOP_OPTS # # a) Set JMX options -# export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026" +# export HDFS_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026" # # b) Set garbage collection logs -# export HADOOP_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" +# export HDFS_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" # # c) ... or set them directly -# export HADOOP_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" +# export HDFS_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')" # this is the default: -# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" +# export HDFS_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" ### # SecondaryNameNode specific parameters @@ -313,7 +313,7 @@ esac # and therefore may override any similar flags set in HADOOP_OPTS # # This is the default: -# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" +# export HDFS_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS" ### # DataNode specific parameters @@ -323,7 +323,7 @@ esac # and therefore may override any similar flags set in HADOOP_OPTS # # This is the default: -# export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS" +# export HDFS_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS" # On secure datanodes, user to run the datanode as after dropping privileges. # This **MUST** be uncommented to enable secure HDFS if using privileged ports @@ -336,7 +336,7 @@ esac # Supplemental options for secure datanodes # By default, Hadoop uses jsvc which needs to know to launch a # server jvm. -# export HADOOP_DN_SECURE_EXTRA_OPTS="-jvm server" +# export HDFS_DATANODE_SECURE_EXTRA_OPTS="-jvm server" # Where datanode log files are stored in the secure data environment. # This will replace the hadoop.log.dir Java property in secure mode. @@ -352,18 +352,18 @@ esac # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # -# export HADOOP_NFS3_OPTS="" +# export HDFS_NFS3_OPTS="" # Specify the JVM options to be used when starting the Hadoop portmapper. # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # -# export HADOOP_PORTMAP_OPTS="-Xmx512m" +# export HDFS_PORTMAP_OPTS="-Xmx512m" # Supplemental options for priviliged gateways # By default, Hadoop uses jsvc which needs to know to launch a # server jvm. -# export HADOOP_NFS3_SECURE_EXTRA_OPTS="-jvm server" +# export HDFS_NFS3_SECURE_EXTRA_OPTS="-jvm server" # On privileged gateways, user to run the gateway as after dropping privileges # This will replace the hadoop.id.str Java property in secure mode. @@ -376,7 +376,7 @@ esac # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # -# export HADOOP_ZKFC_OPTS="" +# export HDFS_ZKFC_OPTS="" ### # QuorumJournalNode specific parameters @@ -385,7 +385,7 @@ esac # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # -# export HADOOP_JOURNALNODE_OPTS="" +# export HDFS_JOURNALNODE_OPTS="" ### # HDFS Balancer specific parameters @@ -394,7 +394,7 @@ esac # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # -# export HADOOP_BALANCER_OPTS="" +# export HDFS_BALANCER_OPTS="" ### # HDFS Mover specific parameters @@ -403,7 +403,7 @@ esac # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS # -# export HADOOP_MOVER_OPTS="" +# export HDFS_MOVER_OPTS="" ### # Advanced Users Only! @@ -417,6 +417,7 @@ esac # # To prevent accidents, shell commands be (superficially) locked # to only allow certain users to execute certain subcommands. +# It uses the format of (command)_(subcommand)_USER. # # For example, to limit who can execute the namenode command, -# export HADOOP_namenode_USER=hdfs +# export HDFS_NAMENODE_USER=hdfs diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md index 0d551b1c2e..f2227690e6 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md @@ -64,17 +64,17 @@ Administrators can configure individual daemons using the configuration options | Daemon | Environment Variable | |:---- |:---- | -| NameNode | HADOOP\_NAMENODE\_OPTS | -| DataNode | HADOOP\_DATANODE\_OPTS | -| Secondary NameNode | HADOOP\_SECONDARYNAMENODE\_OPTS | +| NameNode | HDFS\_NAMENODE\_OPTS | +| DataNode | HDFS\_DATANODE\_OPTS | +| Secondary NameNode | HDFS\_SECONDARYNAMENODE\_OPTS | | ResourceManager | YARN\_RESOURCEMANAGER\_OPTS | | NodeManager | YARN\_NODEMANAGER\_OPTS | | WebAppProxy | YARN\_PROXYSERVER\_OPTS | -| Map Reduce Job History Server | HADOOP\_JOB\_HISTORYSERVER\_OPTS | +| Map Reduce Job History Server | MAPRED\_HISTORYSERVER\_OPTS | -For example, To configure Namenode to use parallelGC, the following statement should be added in hadoop-env.sh : +For example, To configure Namenode to use parallelGC and a 4GB Java Heap, the following statement should be added in hadoop-env.sh : - export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC" + export HDFS_NAMENODE_OPTS="-XX:+UseParallelGC -Xmx4g" See `etc/hadoop/hadoop-env.sh` for other examples. @@ -91,13 +91,6 @@ It is also traditional to configure `HADOOP_HOME` in the system-wide shell envir HADOOP_HOME=/path/to/hadoop export HADOOP_HOME -| Daemon | Environment Variable | -|:---- |:---- | -| ResourceManager | YARN\_RESOURCEMANAGER\_HEAPSIZE | -| NodeManager | YARN\_NODEMANAGER\_HEAPSIZE | -| WebAppProxy | YARN\_PROXYSERVER\_HEAPSIZE | -| Map Reduce Job History Server | HADOOP\_JOB\_HISTORYSERVER\_HEAPSIZE | - ### Configuring the Hadoop Daemons This section deals with important parameters to be specified in the given configuration files: diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md index 940627dd52..b130f0f8e7 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md @@ -24,7 +24,7 @@ Apache Hadoop has many environment variables that control various aspects of the ### `HADOOP_CLIENT_OPTS` -This environment variable is used for almost all end-user operations. It can be used to set any Java options as well as any Apache Hadoop options via a system property definition. For example: +This environment variable is used for all end-user, non-daemon operations. It can be used to set any Java options as well as any Apache Hadoop options via a system property definition. For example: ```bash HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls /tmp @@ -32,6 +32,18 @@ HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls / will increase the memory and send this command via a SOCKS proxy server. +### `(command)_(subcommand)_OPTS` + +It is also possible to set options on a per subcommand basis. This allows for one to create special options for particular cases. The first part of the pattern is the command being used, but all uppercase. The second part of the command is the subcommand being used. Then finally followed by the string `_OPT`. + +For example, to configure `mapred distcp` to use a 2GB heap, one would use: + +```bash +MAPRED_DISTCP_OPTS="-Xmx2g" +``` + +These options will appear *after* `HADOOP_CLIENT_OPTS` during execution and will generally take precedence. + ### `HADOOP_CLASSPATH` NOTE: Site-wide settings should be configured via a shellprofile entry and permanent user-wide settings should be configured via ${HOME}/.hadooprc using the `hadoop_add_classpath` function. See below for more information. @@ -56,6 +68,8 @@ For example: # HADOOP_CLIENT_OPTS="-Xmx1g" +MAPRED_DISTCP_OPTS="-Xmx2g" +HADOOP_DISTCP_OPTS="-Xmx2g" ``` The `.hadoop-env` file can also be used to extend functionality and teach Apache Hadoop new tricks. For example, to run hadoop commands accessing the server referenced in the environment variable `${HADOOP_SERVER}`, the following in the `.hadoop-env` will do just that: @@ -71,11 +85,23 @@ One word of warning: not all of Unix Shell API routines are available or work c ## Administrator Environment -There are many environment variables that impact how the system operates. By far, the most important are the series of `_OPTS` variables that control how daemons work. These variables should contain all of the relevant settings for those daemons. +In addition to the various XML files, there are two key capabilities for administrators to configure Apache Hadoop when using the Unix Shell: -More, detailed information is contained in `hadoop-env.sh` and the other env.sh files. + * Many environment variables that impact how the system operates. This guide will only highlight some key ones. There is generally more information in the various `*-env.sh` files. -Advanced administrators may wish to supplement or do some platform-specific fixes to the existing scripts. In some systems, this means copying the errant script or creating a custom build with these changes. Apache Hadoop provides the capabilities to do function overrides so that the existing code base may be changed in place without all of that work. Replacing functions is covered later under the Shell API documentation. + * Supplement or do some platform-specific changes to the existing scripts. Apache Hadoop provides the capabilities to do function overrides so that the existing code base may be changed in place without all of that work. Replacing functions is covered later under the Shell API documentation. + +### `(command)_(subcommand)_OPTS` + +By far, the most important are the series of `_OPTS` variables that control how daemons work. These variables should contain all of the relevant settings for those daemons. + +Similar to the user commands above, all daemons will honor the `(command)_(subcommand)_OPTS` pattern. It is generally recommended that these be set in `hadoop-env.sh` to guarantee that the system will know which settings it should use on restart. Unlike user-facing subcommands, daemons will *NOT* honor `HADOOP_CLIENT_OPTS`. + +In addition, daemons that run in an extra security mode also support `(command)_(subcommand)_SECURE_EXTRA_OPTS`. These options are *supplemental* to the generic `*_OPTS` and will appear after, therefore generally taking precedence. + +### `(command)_(subcommand)_USER` + +Apache Hadoop provides a way to do a user check per-subcommand. While this method is easily circumvented and should not be considered a security-feature, it does provide a mechanism by which to prevent accidents. For example, setting `HDFS_NAMENODE_USER=hdfs` will make the `hdfs namenode` and `hdfs --daemon start namenode` commands verify that the user running the commands are the hdfs user by checking the `USER` environment variable. This also works for non-daemons. Setting `HADOOP_DISTCP_USER=jane` will verify that `USER` is set to `jane` before being allowed to execute the `hadoop distcp` command. ## Developer and Advanced Administrator Environment diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_add_client_opts.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_add_client_opts.bats new file mode 100644 index 0000000000..bc5051d463 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_add_client_opts.bats @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load hadoop-functions_test_helper + +@test "hadoop_subcommand_opts (daemonization false)" { + HADOOP_OPTS="1" + HADOOP_CLIENT_OPTS="2" + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="false" + hadoop_add_client_opts + [ "${HADOOP_OPTS}" = "1 2" ] +} + +@test "hadoop_subcommand_opts (daemonization true)" { + HADOOP_OPTS="1" + HADOOP_CLIENT_OPTS="2" + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + hadoop_add_client_opts + [ "${HADOOP_OPTS}" = "1" ] +} + +@test "hadoop_subcommand_opts (daemonization empty)" { + HADOOP_OPTS="1" + HADOOP_CLIENT_OPTS="2" + unset HADOOP_SUBCMD_SUPPORTDAEMONIZATION + hadoop_add_client_opts + [ "${HADOOP_OPTS}" = "1 2" ] +} diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommand_opts.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommand_opts.bats new file mode 100644 index 0000000000..0010a58748 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommand_opts.bats @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load hadoop-functions_test_helper + +@test "hadoop_subcommand_opts (missing param)" { + HADOOP_OPTS="x" + run hadoop_subcommand_opts testvar + [ "${status}" = "1" ] +} + +@test "hadoop_subcommand_opts (simple not exist)" { + HADOOP_OPTS="x" + hadoop_subcommand_opts hadoop subcommand + [ "${HADOOP_OPTS}" = "x" ] +} + +@test "hadoop_subcommand_opts (hadoop simple exist)" { + HADOOP_OPTS="x" + HADOOP_TEST_OPTS="y" + hadoop_subcommand_opts hadoop test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y" ] +} + +@test "hadoop_subcommand_opts (hadoop complex exist)" { + HADOOP_OPTS="x" + HADOOP_TEST_OPTS="y z" + hadoop_subcommand_opts hadoop test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y z" ] +} + +@test "hadoop_subcommand_opts (hdfs simple exist)" { + HADOOP_OPTS="x" + HDFS_TEST_OPTS="y" + hadoop_subcommand_opts hdfs test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y" ] +} + +@test "hadoop_subcommand_opts (yarn simple exist)" { + HADOOP_OPTS="x" + YARN_TEST_OPTS="y" + hadoop_subcommand_opts yarn test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y" ] +} + +@test "hadoop_subcommand_opts (deprecation case)" { + HADOOP_OPTS="x" + HADOOP_NAMENODE_OPTS="y" + hadoop_subcommand_opts hdfs namenode + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y" ] +} diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommand_secure_opts.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommand_secure_opts.bats new file mode 100644 index 0000000000..1b3506ccf5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommand_secure_opts.bats @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load hadoop-functions_test_helper + +@test "hadoop_subcommand_secure_opts (missing param)" { + HADOOP_OPTS="x" + run hadoop_subcommand_secure_opts testvar + [ "${status}" = "1" ] +} + +@test "hadoop_subcommand_secure_opts (simple not exist)" { + HADOOP_OPTS="x" + hadoop_subcommand_secure_opts hadoop subcommand + [ "${HADOOP_OPTS}" = "x" ] +} + +@test "hadoop_subcommand_secure_opts (hadoop simple exist)" { + HADOOP_OPTS="x" + HADOOP_TEST_SECURE_EXTRA_OPTS="y" + hadoop_subcommand_secure_opts hadoop test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y" ] +} + +@test "hadoop_subcommand_secure_opts (hadoop complex exist)" { + HADOOP_OPTS="x" + HADOOP_TEST_SECURE_EXTRA_OPTS="y z" + hadoop_subcommand_secure_opts hadoop test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y z" ] +} + +@test "hadoop_subcommand_secure_opts (hdfs simple exist)" { + HADOOP_OPTS="x" + HDFS_TEST_SECURE_EXTRA_OPTS="y" + hadoop_subcommand_secure_opts hdfs test + echo "${HADOOP_OPTS}" + [ "${HADOOP_OPTS}" = "x y" ] +} diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_verify_user.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_verify_user.bats new file mode 100644 index 0000000000..ac9fa9f659 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_verify_user.bats @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load hadoop-functions_test_helper + +@test "hadoop_verify_user (hadoop: no setting)" { + run hadoop_verify_user hadoop test + [ "${status}" = "0" ] +} + +@test "hadoop_verify_user (yarn: no setting)" { + run hadoop_verify_user yarn test + [ "${status}" = "0" ] +} + +@test "hadoop_verify_user (hadoop: allow)" { + HADOOP_TEST_USER=${USER} + run hadoop_verify_user hadoop test + [ "${status}" = "0" ] +} + +@test "hadoop_verify_user (yarn: allow)" { + YARN_TEST_USER=${USER} + run hadoop_verify_user yarn test + [ "${status}" = "0" ] +} + +# colon isn't a valid username, so let's use it +# this should fail regardless of who the user is +# that is running the test code +@test "hadoop_verify_user (hadoop: disallow)" { + HADOOP_TEST_USER=: + run hadoop_verify_user hadoop test + [ "${status}" = "1" ] +} + +@test "hadoop_verify_user (yarn: disallow)" { + YARN_TEST_USER=: + run hadoop_verify_user yarn test + [ "${status}" = "1" ] +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 7a90f08a89..6d6088fc62 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -79,8 +79,6 @@ function hdfscmd_case balancer) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.balancer.Balancer - hadoop_debug "Appending HADOOP_BALANCER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}" ;; cacheadmin) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.CacheAdmin @@ -103,13 +101,8 @@ function hdfscmd_case HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}" HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}" - hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS" - hadoop_debug "Appending HADOOP_DN_SECURE_EXTRA_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS}" HADOOP_CLASSNAME="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter" else - hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}" HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.datanode.DataNode' fi ;; @@ -118,18 +111,12 @@ function hdfscmd_case ;; dfs) HADOOP_CLASSNAME=org.apache.hadoop.fs.FsShell - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; dfsadmin) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSAdmin - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; diskbalancer) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DiskBalancerCLI - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; envvars) echo "JAVA_HOME='${JAVA_HOME}'" @@ -144,16 +131,12 @@ function hdfscmd_case ;; erasurecode) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.erasurecode.ECCli - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; fetchdt) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher ;; fsck) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSck - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; getconf) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.GetConf @@ -163,14 +146,10 @@ function hdfscmd_case ;; haadmin) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSHAAdmin - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; journalnode) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.hdfs.qjournal.server.JournalNode' - hadoop_debug "Appending HADOOP_JOURNALNODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}" ;; jmxget) HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.JMXGet @@ -181,14 +160,10 @@ function hdfscmd_case mover) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.mover.Mover - hadoop_debug "Appending HADOOP_MOVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_MOVER_OPTS}" ;; namenode) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.namenode.NameNode' - hadoop_debug "Appending HADOOP_NAMENODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}" hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" ;; nfs3) @@ -201,13 +176,8 @@ function hdfscmd_case HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}" HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}" - hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS" - hadoop_debug "Appending HADOOP_NFS3_SECURE_EXTRA_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS}" HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter else - hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}" HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3 fi ;; @@ -223,14 +193,10 @@ function hdfscmd_case portmap) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME=org.apache.hadoop.portmap.Portmap - hadoop_debug "Appending HADOOP_PORTMAP_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}" ;; secondarynamenode) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' - hadoop_debug "Appending HADOOP_SECONDARYNAMENODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}" hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" ;; snapshotDiff) @@ -245,8 +211,6 @@ function hdfscmd_case zkfc) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' - hadoop_debug "Appending HADOOP_ZKFC_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}" ;; *) HADOOP_CLASSNAME="${subcmd}" @@ -282,6 +246,8 @@ fi HADOOP_SUBCMD=$1 shift +hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + HADOOP_SUBCMD_ARGS=("$@") if declare -f hdfs_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then @@ -291,15 +257,20 @@ else hdfscmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" fi -hadoop_verify_user "${HADOOP_SUBCMD}" +hadoop_add_client_opts if [[ ${HADOOP_WORKER_MODE} = true ]]; then hadoop_common_worker_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}" exit $? fi +hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + + hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + hadoop_verify_secure_prereq hadoop_setup_secure_service priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh old mode 100644 new mode 100755 index d440210992..cba37a4495 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh @@ -26,7 +26,7 @@ function hadoop_subproject_init export HADOOP_HDFS_ENV_PROCESSED=true fi fi - + # at some point in time, someone thought it would be a good idea to # create separate vars for every subproject. *sigh* # let's perform some overrides and setup some defaults for bw compat @@ -42,23 +42,31 @@ function hadoop_subproject_init hadoop_deprecate_envvar HADOOP_HDFS_NICENESS HADOOP_NICENESS hadoop_deprecate_envvar HADOOP_HDFS_STOP_TIMEOUT HADOOP_STOP_TIMEOUT - + hadoop_deprecate_envvar HADOOP_HDFS_PID_DIR HADOOP_PID_DIR hadoop_deprecate_envvar HADOOP_HDFS_ROOT_LOGGER HADOOP_ROOT_LOGGER hadoop_deprecate_envvar HADOOP_HDFS_IDENT_STRING HADOOP_IDENT_STRING - + + hadoop_deprecate_envvar HADOOP_DN_SECURE_EXTRA_OPTS HDFS_DATANODE_SECURE_EXTRA_OPTS + + hadoop_deprecate_envvar HADOOP_NFS3_SECURE_EXTRA_OPTS HDFS_NFS3_SECURE_EXTRA_OPTS + + HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME}" - + # turn on the defaults export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} - export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS"} - export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS"} - export HADOOP_DATANODE_OPTS=${HADOOP_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"} - export HADOOP_DN_SECURE_EXTRA_OPTS=${HADOOP_DN_SECURE_EXTRA_OPTS:-"-jvm server"} - export HADOOP_NFS3_SECURE_EXTRA_OPTS=${HADOOP_NFS3_SECURE_EXTRA_OPTS:-"-jvm server"} - export HADOOP_PORTMAP_OPTS=${HADOOP_PORTMAP_OPTS:-"-Xmx512m"} + export HDFS_NAMENODE_OPTS=${HDFS_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS"} + export HDFS_SECONDARYNAMENODE_OPTS=${HDFS_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS"} + export HDFS_DATANODE_OPTS=${HDFS_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"} + export HDFS_PORTMAP_OPTS=${HDFS_PORTMAP_OPTS:-"-Xmx512m"} + + # depending upon what is being used to start Java, these may need to be + # set empty. (thus no colon) + export HDFS_DATANODE_SECURE_EXTRA_OPTS=${HDFS_DATANODE_SECURE_EXTRA_OPTS-"-jvm server"} + export HDFS_NFS3_SECURE_EXTRA_OPTS=${HDFS_NFS3_SECURE_EXTRA_OPTS-"-jvm server"} } if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsNfsGateway.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsNfsGateway.md index ddb4f01efe..37a2042d08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsNfsGateway.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsNfsGateway.md @@ -183,7 +183,7 @@ It's strongly recommended for the users to update a few configuration properties * JVM and log settings. You can export JVM settings (e.g., heap size and GC log) in - HADOOP\_NFS3\_OPTS. More NFS related settings can be found in hadoop-env.sh. + HDFS\_NFS3\_OPTS. More NFS related settings can be found in hadoop-env.sh. To get NFS debug trace, you can edit the log4j.property file to add the following. Note, debug trace, especially for ONCRPC, can be very verbose. diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index 046d48c4cf..fe7c56ae08 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -69,8 +69,6 @@ function mapredcmd_case historyserver) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer - hadoop_debug "Appending HADOOP_JOB_HISTORYSERVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}" if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then # shellcheck disable=SC2034 HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" @@ -79,31 +77,21 @@ function mapredcmd_case ;; hsadmin) HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; job) HADOOP_CLASSNAME=org.apache.hadoop.mapred.JobClient - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; pipes) HADOOP_CLASSNAME=org.apache.hadoop.mapred.pipes.Submitter - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; queue) HADOOP_CLASSNAME=org.apache.hadoop.mapred.JobQueueClient ;; sampler) HADOOP_CLASSNAME=org.apache.hadoop.mapred.lib.InputSampler - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; version) HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" ;; *) HADOOP_CLASSNAME="${subcmd}" @@ -141,6 +129,8 @@ fi HADOOP_SUBCMD=$1 shift +hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + HADOOP_SUBCMD_ARGS=("$@") if declare -f mapred_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then @@ -150,15 +140,20 @@ else mapredcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" fi -hadoop_verify_user "${HADOOP_SUBCMD}" +hadoop_add_client_opts -if [[ ${HADOOP_SLAVE_MODE} = true ]]; then - hadoop_common_slave_mode_execute "${HADOOP_MAPRED_HOME}/bin/mapred" "${HADOOP_USER_PARAMS[@]}" +if [[ ${HADOOP_WORKER_MODE} = true ]]; then + hadoop_common_worker_mode_execute "${HADOOP_MAPRED_HOME}/bin/mapred" "${HADOOP_USER_PARAMS[@]}" exit $? fi +hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + + hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + hadoop_verify_secure_prereq hadoop_setup_secure_service priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" diff --git a/hadoop-mapreduce-project/bin/mapred-config.sh b/hadoop-mapreduce-project/bin/mapred-config.sh old mode 100644 new mode 100755 index a9897929c5..68d3463de6 --- a/hadoop-mapreduce-project/bin/mapred-config.sh +++ b/hadoop-mapreduce-project/bin/mapred-config.sh @@ -26,7 +26,7 @@ function hadoop_subproject_init export HADOOP_MAPRED_ENV_PROCESSED=true fi fi - + # at some point in time, someone thought it would be a good idea to # create separate vars for every subproject. *sigh* # let's perform some overrides and setup some defaults for bw compat @@ -38,15 +38,17 @@ function hadoop_subproject_init hadoop_deprecate_envvar HADOOP_MAPRED_LOG_DIR HADOOP_LOG_DIR hadoop_deprecate_envvar HADOOP_MAPRED_LOGFILE HADOOP_LOGFILE - + hadoop_deprecate_envvar HADOOP_MAPRED_NICENESS HADOOP_NICENESS - + hadoop_deprecate_envvar HADOOP_MAPRED_STOP_TIMEOUT HADOOP_STOP_TIMEOUT - + hadoop_deprecate_envvar HADOOP_MAPRED_PID_DIR HADOOP_PID_DIR hadoop_deprecate_envvar HADOOP_MAPRED_ROOT_LOGGER HADOOP_ROOT_LOGGER + hadoop_deprecate_envvar HADOOP_JOB_HISTORY_OPTS MAPRED_HISTORYSERVER_OPTS + HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_HOME}" hadoop_deprecate_envvar HADOOP_MAPRED_IDENT_STRING HADOOP_IDENT_STRING diff --git a/hadoop-mapreduce-project/conf/mapred-env.sh b/hadoop-mapreduce-project/conf/mapred-env.sh index bbe4a4980c..53bc2964c3 100644 --- a/hadoop-mapreduce-project/conf/mapred-env.sh +++ b/hadoop-mapreduce-project/conf/mapred-env.sh @@ -31,14 +31,14 @@ # Specify the max heapsize for the JobHistoryServer. If no units are # given, it will be assumed to be in MB. # This value will be overridden by an Xmx setting specified in HADOOP_OPTS, -# and/or HADOOP_JOB_HISTORYSERVER_OPTS. +# and/or MAPRED_HISTORYSERVER_OPTS. # Default is the same as HADOOP_HEAPSIZE_MAX. #export HADOOP_JOB_HISTORYSERVER_HEAPSIZE= # Specify the JVM options to be used when starting the HistoryServer. # These options will be appended to the options specified as HADOOP_OPTS # and therefore may override any similar flags set in HADOOP_OPTS -#export HADOOP_JOB_HISTORYSERVER_OPTS= +#export MAPRED_HISTORYSERVER_OPTS= # Specify the log4j settings for the JobHistoryServer # Java property: hadoop.root.logger diff --git a/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh b/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh index ae7b6c67d1..c889816a12 100755 --- a/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh +++ b/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh @@ -32,8 +32,6 @@ function mapred_subcommand_archive-logs # shellcheck disable=SC2034 HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchiveLogs hadoop_add_to_classpath_tools hadoop-archive-logs - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } fi diff --git a/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh b/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh index 0178c54e91..6e93ec1787 100755 --- a/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh +++ b/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh @@ -32,8 +32,6 @@ function hadoop_subcommand_distcp # shellcheck disable=SC2034 HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCp hadoop_add_to_classpath_tools hadoop-distcp - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } fi @@ -55,8 +53,6 @@ function mapred_subcommand_distcp # shellcheck disable=SC2034 HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCp hadoop_add_to_classpath_tools hadoop-distcp - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } fi diff --git a/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh b/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh index 829d406292..1ce9aeee98 100755 --- a/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh +++ b/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh @@ -32,8 +32,6 @@ function hadoop_subcommand_distch # shellcheck disable=SC2034 HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCh hadoop_add_to_classpath_tools hadoop-extras - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } fi diff --git a/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh b/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh index d7d4022453..77023ff888 100755 --- a/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh +++ b/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh @@ -30,8 +30,6 @@ function hadoop_subcommand_rumenfolder # shellcheck disable=SC2034 HADOOP_CLASSNAME=org.apache.hadoop.tools.rumen.Folder hadoop_add_to_classpath_tools hadoop-rumen - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } fi @@ -51,8 +49,6 @@ function hadoop_subcommand_rumentrace # shellcheck disable=SC2034 HADOOP_CLASSNAME=org.apache.hadoop.tools.rumen.TraceBuilder hadoop_add_to_classpath_tools hadoop-rumen - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } fi diff --git a/hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh b/hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh index 0bd291bb8f..565dfe6f5c 100644 --- a/hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh +++ b/hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh @@ -68,8 +68,7 @@ function run_sls_generator() hadoop_add_param args -outputJobs "-outputJobs ${outputdir}/${outputprefix}-jobs.json" hadoop_add_param args -outputNodes "-outputNodes ${outputdir}/${outputprefix}-nodes.json" - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + hadoop_add_client_opts hadoop_finalize # shellcheck disable=SC2086 diff --git a/hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh b/hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh index 403c4bb05f..218dee43fc 100644 --- a/hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh +++ b/hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh @@ -96,8 +96,7 @@ function run_simulation() { hadoop_add_param args -printsimulation "-printsimulation" fi - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + hadoop_add_client_opts hadoop_finalize # shellcheck disable=SC2086 diff --git a/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh b/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh index cca016d860..c3010ffce0 100755 --- a/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh +++ b/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh @@ -46,10 +46,6 @@ function mapred_subcommand_streaming done IFS=${oldifs} - - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - } fi diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index bd91633368..804fd1a77f 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -68,8 +68,6 @@ function yarncmd_case case ${subcmd} in application|applicationattempt|container) HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.ApplicationCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" set -- "${subcmd}" "$@" HADOOP_SUBCMD_ARGS=("$@") ;; @@ -78,13 +76,9 @@ function yarncmd_case ;; cluster) HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.ClusterCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto YARN_OPTS" - YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}" ;; daemonlog) HADOOP_CLASSNAME=org.apache.hadoop.log.LogLevel - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; envvars) echo "JAVA_HOME='${JAVA_HOME}'" @@ -99,8 +93,6 @@ function yarncmd_case ;; jar) HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; historyserver) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" @@ -111,19 +103,13 @@ function yarncmd_case ;; logs) HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.LogsCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; node) HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.NodeCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; nodemanager) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.nodemanager.NodeManager' - hadoop_debug "Append YARN_NODEMANAGER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_NODEMANAGER_OPTS}" # Backwards compatibility if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then HADOOP_HEAPSIZE_MAX="${YARN_NODEMANAGER_HEAPSIZE}" @@ -132,8 +118,6 @@ function yarncmd_case proxyserver) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' - hadoop_debug "Append YARN_PROXYSERVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_PROXYSERVER_OPTS}" # Backwards compatibility if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then # shellcheck disable=SC2034 @@ -142,14 +126,10 @@ function yarncmd_case ;; queue) HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.QueueCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; resourcemanager) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_RESOURCEMANAGER_OPTS}" - hadoop_debug "Append YARN_RESOURCEMANAGER_OPTS onto HADOOP_OPTS" # Backwards compatibility if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then # shellcheck disable=SC2034 @@ -158,31 +138,21 @@ function yarncmd_case ;; rmadmin) HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.cli.RMAdminCLI' - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; scmadmin) HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.SCMAdmin' - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; sharedcachemanager) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.sharedcachemanager.SharedCacheManager' - hadoop_debug "Append YARN_SHAREDCACHEMANAGER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_SHAREDCACHEMANAGER_OPTS}" ;; timelinereader) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderServer' - hadoop_debug "Append YARN_TIMELINEREADER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_TIMELINEREADER_OPTS}" ;; timelineserver) HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' - hadoop_debug "Append YARN_TIMELINESERVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_TIMELINESERVER_OPTS}" # Backwards compatibility if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then # shellcheck disable=SC2034 @@ -191,8 +161,6 @@ function yarncmd_case ;; version) HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" ;; top) doNotSetCols=0 @@ -222,8 +190,6 @@ function yarncmd_case fi fi HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.TopCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" HADOOP_SUBCMD_ARGS=("$@") ;; *) @@ -262,6 +228,8 @@ fi HADOOP_SUBCMD=$1 shift +hadoop_verify_user "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + HADOOP_SUBCMD_ARGS=("$@") if declare -f yarn_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then @@ -271,15 +239,30 @@ else yarncmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" fi -hadoop_verify_user "${HADOOP_SUBCMD}" +# It's unclear if YARN_CLIENT_OPTS is actually a useful +# thing to have separate from HADOOP_CLIENT_OPTS. Someone +# might use it, so let's not deprecate it and just override +# HADOOP_CLIENT_OPTS instead before we (potentially) add it +# to the command line +if [[ -n "${YARN_CLIENT_OPTS}" ]]; then + # shellcheck disable=SC2034 + HADOOP_CLIENT_OPTS=${YARN_CLIENT_OPTS} +fi + +hadoop_add_client_opts if [[ ${HADOOP_WORKER_MODE} = true ]]; then hadoop_common_worker_mode_execute "${HADOOP_YARN_HOME}/bin/yarn" "${HADOOP_USER_PARAMS[@]}" exit $? fi +hadoop_subcommand_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + + hadoop_subcommand_secure_opts "${HADOOP_SHELL_EXECNAME}" "${HADOOP_SUBCMD}" + hadoop_verify_secure_prereq hadoop_setup_secure_service priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out"