From 730bc746f9ac6e045e94dc2bc622b16de0159b4b Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Mon, 28 Mar 2016 09:00:07 -0700 Subject: [PATCH] HADOOP-12930. Dynamic subcommands for hadoop shell scripts (aw) This commit contains the following JIRA issues: HADOOP-12931. bin/hadoop work for dynamic subcommands HADOOP-12932. bin/yarn work for dynamic subcommands HADOOP-12933. bin/hdfs work for dynamic subcommands HADOOP-12934. bin/mapred work for dynamic subcommands HADOOP-12935. API documentation for dynamic subcommands HADOOP-12936. modify hadoop-tools to take advantage of dynamic subcommands HADOOP-13086. enable daemonization of dynamic commands HADOOP-13087. env var doc update for dynamic commands HADOOP-13088. fix shellprofiles in hadoop-tools to allow replacement HADOOP-13089. hadoop distcp adds client opts twice when dynamic HADOOP-13094. hadoop-common unit tests for dynamic commands HADOOP-13095. hadoop-hdfs unit tests for dynamic commands HADOOP-13107. clean up how rumen is executed HADOOP-13108. dynamic subcommands need a way to manipulate arguments HADOOP-13110. add a streaming subcommand to mapred HADOOP-13111. convert hadoop gridmix to be dynamic HADOOP-13115. dynamic subcommand docs should talk about exit vs. continue program flow HADOOP-13117. clarify daemonization and security vars for dynamic commands HADOOP-13120. add a --debug message when dynamic commands have been used HADOOP-13121. rename sub-project shellprofiles to match the rest of Hadoop HADOOP-13129. fix typo in dynamic subcommand docs HADOOP-13151. Underscores should be escaped in dynamic subcommands document HADOOP-13153. fix typo in debug statement for dynamic subcommands --- .../resources/assemblies/hadoop-tools.xml | 56 +++ .../hadoop-common/src/main/bin/hadoop | 319 +++++++------ .../src/site/markdown/UnixShellGuide.md | 87 +++- .../scripts/hadoop-functions_test_helper.bash | 4 +- .../src/test/scripts/hadoop_subcommands.bats | 78 ++++ hadoop-hdfs-project/hadoop-hdfs/pom.xml | 38 +- .../hadoop-hdfs/src/main/bin/hdfs | 430 ++++++++++-------- .../{hdfs.sh => hadoop-hdfs.sh} | 2 +- .../scripts/hdfs-functions_test_helper.bash | 58 +++ .../src/test/scripts/hdfs_subcommands.bats | 78 ++++ .../hadoop-hdfs/src/test/scripts/run-bats.sh | 43 ++ hadoop-mapreduce-project/bin/mapred | 230 +++++----- .../{mapreduce.sh => hadoop-mapreduce.sh} | 2 +- .../shellprofile.d/hadoop-archive-logs.sh | 39 ++ .../main/shellprofile.d/hadoop-archives.sh | 58 +++ .../src/main/shellprofile.d/hadoop-distcp.sh | 62 +++ .../src/main/shellprofile.d/hadoop-extras.sh | 39 ++ .../src/main/shellprofile.d/hadoop-gridmix.sh | 36 ++ .../src/main/shellprofile.d/hadoop-rumen.sh | 58 +++ .../src/site/markdown/Rumen.md.vm | 40 +- .../hadoop/streaming/DumpTypedBytes.java | 3 +- .../hadoop/streaming/HadoopStreaming.java | 3 +- .../hadoop/streaming/LoadTypedBytes.java | 3 +- .../main/shellprofile.d/hadoop-streaming.sh | 55 +++ .../src/site/markdown/HadoopStreaming.md.vm | 30 +- hadoop-yarn-project/hadoop-yarn/bin/yarn | 394 +++++++++------- .../{yarn.sh => hadoop-yarn.sh} | 2 +- 27 files changed, 1583 insertions(+), 664 deletions(-) create mode 100755 hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommands.bats rename hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/{hdfs.sh => hadoop-hdfs.sh} (98%) create mode 100755 hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs-functions_test_helper.bash create mode 100755 hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs_subcommands.bats create mode 100755 hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/run-bats.sh rename hadoop-mapreduce-project/shellprofile.d/{mapreduce.sh => hadoop-mapreduce.sh} (98%) create mode 100755 hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh create mode 100755 hadoop-tools/hadoop-archives/src/main/shellprofile.d/hadoop-archives.sh create mode 100755 hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh create mode 100755 hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh create mode 100755 hadoop-tools/hadoop-gridmix/src/main/shellprofile.d/hadoop-gridmix.sh create mode 100755 hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh create mode 100755 hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh rename hadoop-yarn-project/hadoop-yarn/shellprofile.d/{yarn.sh => hadoop-yarn.sh} (99%) diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml index f8ba48e568..bc9548bebb 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml @@ -23,6 +23,38 @@ false + + ../hadoop-archive-logs/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + + + ../hadoop-archives/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + + + ../hadoop-distcp/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + + + ../hadoop-extras/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + ../hadoop-pipes/src/main/native/pipes/api/hadoop @@ -93,6 +125,14 @@ *-sources.jar + + ../hadoop-gridmix/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + ../hadoop-rumen/target /share/hadoop/${hadoop.component}/sources @@ -100,6 +140,14 @@ *-sources.jar + + ../hadoop-rumen/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + ../hadoop-streaming/target /share/hadoop/${hadoop.component}/sources @@ -107,6 +155,14 @@ *-sources.jar + + ../hadoop-streaming/src/main/shellprofile.d + + * + + /libexec/shellprofile.d + 0755 + ../hadoop-sls/target /share/hadoop/${hadoop.component}/sources diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index fccb9f8319..6cf872c5eb 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -16,7 +16,12 @@ # limitations under the License. MYNAME="${BASH_SOURCE-$0}" +HADOOP_SHELL_EXECNAME="${MYNAME##*/}" +## @description build up the hadoop command's usage text. +## @audience public +## @stability stable +## @replaceable no function hadoop_usage { hadoop_add_option "buildpaths" "attempt to add class files from build tree" @@ -25,25 +30,141 @@ function hadoop_usage hadoop_add_option "hosts filename" "list of hosts to use in slave mode" hadoop_add_option "slaves" "turn on slave mode" - hadoop_add_subcommand "archive" "create a Hadoop archive" hadoop_add_subcommand "checknative" "check native Hadoop and compression libraries availability" hadoop_add_subcommand "classpath" "prints the class path needed to get the Hadoop jar and the required libraries" hadoop_add_subcommand "conftest" "validate configuration XML files" hadoop_add_subcommand "credential" "interact with credential providers" hadoop_add_subcommand "daemonlog" "get/set the log level for each daemon" - hadoop_add_subcommand "distch" "distributed metadata changer" - hadoop_add_subcommand "distcp" "copy file or directories recursively" hadoop_add_subcommand "dtutil" "operations related to delegation tokens" hadoop_add_subcommand "envvars" "display computed Hadoop environment variables" hadoop_add_subcommand "fs" "run a generic filesystem user client" - hadoop_add_subcommand "gridmix" "submit a mix of synthetic job, modeling a profiled from production load" hadoop_add_subcommand "jar " "run a jar file. NOTE: please use \"yarn jar\" to launch YARN applications, not this command." hadoop_add_subcommand "jnipath" "prints the java.library.path" hadoop_add_subcommand "kerbname" "show auth_to_local principal conversion" hadoop_add_subcommand "key" "manage keys via the KeyProvider" hadoop_add_subcommand "trace" "view and modify Hadoop tracing settings" hadoop_add_subcommand "version" "print the version" - hadoop_generate_usage "${MYNAME}" true + hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true +} + +## @description Default command handler for hadoop command +## @audience public +## @stability stable +## @replaceable no +## @param CLI arguments +function hadoopcmd_case +{ + subcmd=$1 + shift + + case ${subcmd} in + balancer|datanode|dfs|dfsadmin|dfsgroups| \ + namenode|secondarynamenode|fsck|fetchdt|oiv| \ + portmap|nfs3) + hadoop_error "WARNING: Use of this script to execute ${subcmd} is deprecated." + subcmd=${subcmd/dfsgroups/groups} + hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${subcmd}\" instead." + hadoop_error "" + #try to locate hdfs and if present, delegate to it. + if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then + # shellcheck disable=SC2086 + exec "${HADOOP_HDFS_HOME}/bin/hdfs" \ + --config "${HADOOP_CONF_DIR}" "${subcmd}" "$@" + elif [[ -f "${HADOOP_HOME}/bin/hdfs" ]]; then + # shellcheck disable=SC2086 + exec "${HADOOP_HOME}/bin/hdfs" \ + --config "${HADOOP_CONF_DIR}" "${subcmd}" "$@" + else + hadoop_error "HADOOP_HDFS_HOME not found!" + exit 1 + fi + ;; + + #mapred commands for backwards compatibility + pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker) + hadoop_error "WARNING: Use of this script to execute ${subcmd} is deprecated." + subcmd=${subcmd/mrgroups/groups} + hadoop_error "WARNING: Attempting to execute replacement \"mapred ${subcmd}\" instead." + hadoop_error "" + #try to locate mapred and if present, delegate to it. + if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then + exec "${HADOOP_MAPRED_HOME}/bin/mapred" \ + --config "${HADOOP_CONF_DIR}" "${subcmd}" "$@" + elif [[ -f "${HADOOP_HOME}/bin/mapred" ]]; then + exec "${HADOOP_HOME}/bin/mapred" \ + --config "${HADOOP_CONF_DIR}" "${subcmd}" "$@" + else + hadoop_error "HADOOP_MAPRED_HOME not found!" + exit 1 + fi + ;; + checknative) + HADOOP_CLASSNAME=org.apache.hadoop.util.NativeLibraryChecker + ;; + classpath) + hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@" + ;; + conftest) + HADOOP_CLASSNAME=org.apache.hadoop.util.ConfTest + ;; + credential) + HADOOP_CLASSNAME=org.apache.hadoop.security.alias.CredentialShell + ;; + daemonlog) + HADOOP_CLASSNAME=org.apache.hadoop.log.LogLevel + ;; + dtutil) + HADOOP_CLASSNAME=org.apache.hadoop.security.token.DtUtilShell + ;; + envvars) + echo "JAVA_HOME='${JAVA_HOME}'" + echo "HADOOP_COMMON_HOME='${HADOOP_COMMON_HOME}'" + echo "HADOOP_COMMON_DIR='${HADOOP_COMMON_DIR}'" + echo "HADOOP_COMMON_LIB_JARS_DIR='${HADOOP_COMMON_LIB_JARS_DIR}'" + echo "HADOOP_COMMON_LIB_NATIVE_DIR='${HADOOP_COMMON_LIB_NATIVE_DIR}'" + echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" + echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" + echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" + echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" + exit 0 + ;; + fs) + HADOOP_CLASSNAME=org.apache.hadoop.fs.FsShell + ;; + jar) + if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then + hadoop_error "WARNING: Use \"yarn jar\" to launch YARN applications." + fi + HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar + ;; + jnipath) + hadoop_finalize + echo "${JAVA_LIBRARY_PATH}" + exit 0 + ;; + kerbname) + HADOOP_CLASSNAME=org.apache.hadoop.security.HadoopKerberosName + ;; + key) + HADOOP_CLASSNAME=org.apache.hadoop.crypto.key.KeyShell + ;; + trace) + HADOOP_CLASSNAME=org.apache.hadoop.tracing.TraceAdmin + ;; + version) + HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo + ;; + *) + HADOOP_CLASSNAME="${subcmd}" + if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then + hadoop_exit_with_usage 1 + fi + ;; + esac + + # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" } # This script runs the hadoop core commands. @@ -70,142 +191,76 @@ if [ $# = 0 ]; then hadoop_exit_with_usage 1 fi -COMMAND=$1 +HADOOP_SUBCMD=$1 shift -case ${COMMAND} in - balancer|datanode|dfs|dfsadmin|dfsgroups| \ - namenode|secondarynamenode|fsck|fetchdt|oiv| \ - portmap|nfs3) - hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated." - COMMAND=${COMMAND/dfsgroups/groups} - hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${COMMAND}\" instead." - hadoop_error "" - #try to locate hdfs and if present, delegate to it. - if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then - # shellcheck disable=SC2086 - exec "${HADOOP_HDFS_HOME}/bin/hdfs" \ - --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@" - elif [[ -f "${HADOOP_HOME}/bin/hdfs" ]]; then - # shellcheck disable=SC2086 - exec "${HADOOP_HOME}/bin/hdfs" \ - --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@" - else - hadoop_error "HADOOP_HDFS_HOME not found!" - exit 1 - fi - ;; +HADOOP_SUBCMD_ARGS=("$@") - #mapred commands for backwards compatibility - pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker) - hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated." - COMMAND=${COMMAND/mrgroups/groups} - hadoop_error "WARNING: Attempting to execute replacement \"mapred ${COMMAND}\" instead." - hadoop_error "" - #try to locate mapred and if present, delegate to it. - if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then - exec "${HADOOP_MAPRED_HOME}/bin/mapred" \ - --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@" - elif [[ -f "${HADOOP_HOME}/bin/mapred" ]]; then - exec "${HADOOP_HOME}/bin/mapred" \ - --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@" - else - hadoop_error "HADOOP_MAPRED_HOME not found!" - exit 1 - fi - ;; - archive) - CLASS=org.apache.hadoop.tools.HadoopArchives - hadoop_add_to_classpath_tools hadoop-archives - ;; - checknative) - CLASS=org.apache.hadoop.util.NativeLibraryChecker - ;; - classpath) - hadoop_do_classpath_subcommand CLASS "$@" - ;; - conftest) - CLASS=org.apache.hadoop.util.ConfTest - ;; - credential) - CLASS=org.apache.hadoop.security.alias.CredentialShell - ;; - daemonlog) - CLASS=org.apache.hadoop.log.LogLevel - ;; - distch) - CLASS=org.apache.hadoop.tools.DistCh - hadoop_add_to_classpath_tools hadoop-extras - ;; - distcp) - CLASS=org.apache.hadoop.tools.DistCp - hadoop_add_to_classpath_tools hadoop-distcp - ;; - dtutil) - CLASS=org.apache.hadoop.security.token.DtUtilShell - ;; - envvars) - echo "JAVA_HOME='${JAVA_HOME}'" - echo "HADOOP_COMMON_HOME='${HADOOP_COMMON_HOME}'" - echo "HADOOP_COMMON_DIR='${HADOOP_COMMON_DIR}'" - echo "HADOOP_COMMON_LIB_JARS_DIR='${HADOOP_COMMON_LIB_JARS_DIR}'" - echo "HADOOP_COMMON_LIB_NATIVE_DIR='${HADOOP_COMMON_LIB_NATIVE_DIR}'" - echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" - echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" - echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" - echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" - exit 0 - ;; - fs) - CLASS=org.apache.hadoop.fs.FsShell - ;; - gridmix) - CLASS=org.apache.hadoop.mapred.gridmix.Gridmix - hadoop_add_to_classpath_tools hadoop-rumen - hadoop_add_to_classpath_tools hadoop-gridmix - ;; - jar) - if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then - hadoop_error "WARNING: Use \"yarn jar\" to launch YARN applications." - fi - CLASS=org.apache.hadoop.util.RunJar - ;; - jnipath) - hadoop_finalize - echo "${JAVA_LIBRARY_PATH}" - exit 0 - ;; - kerbname) - CLASS=org.apache.hadoop.security.HadoopKerberosName - ;; - key) - CLASS=org.apache.hadoop.crypto.key.KeyShell - ;; - trace) - CLASS=org.apache.hadoop.tracing.TraceAdmin - ;; - version) - CLASS=org.apache.hadoop.util.VersionInfo - ;; - *) - CLASS="${COMMAND}" - if ! hadoop_validate_classname "${CLASS}"; then - hadoop_exit_with_usage 1 - fi - ;; -esac +if declare -f hadoop_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then + hadoop_debug "Calling dynamically: hadoop_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}" + "hadoop_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +else + hadoopcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +fi -hadoop_verify_user "${COMMAND}" +hadoop_verify_user "${HADOOP_SUBCMD}" if [[ ${HADOOP_SLAVE_MODE} = true ]]; then - hadoop_common_slave_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}" + hadoop_common_slave_mode_execute "${HADOOP_COMMON_HOME}/bin/hadoop" "${HADOOP_USER_PARAMS[@]}" exit $? fi -# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS -hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" -HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + hadoop_verify_secure_prereq + hadoop_setup_secure_service + priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err" + priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" +else + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" +fi + +if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then + # shellcheck disable=SC2034 + HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}" + if [[ -n "${HADOOP_SUBCMD_SECURESERVICE}" ]]; then + # shellcheck disable=SC2034 + HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log" + else + # shellcheck disable=SC2034 + HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log" + fi +fi hadoop_finalize -hadoop_java_exec "${COMMAND}" "${CLASS}" "$@" +if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + hadoop_secure_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${priv_pidfile}" \ + "${priv_outfile}" \ + "${priv_errfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" + else + hadoop_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" + fi + exit $? +else + # shellcheck disable=SC2086 + hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}" +fi diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md index a5fa10c296..b6d7517faf 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/UnixShellGuide.md @@ -85,11 +85,11 @@ Apache Hadoop allows for third parties to easily add new features through a vari Core to this functionality is the concept of a shell profile. Shell profiles are shell snippets that can do things such as add jars to the classpath, configure Java system properties and more. -Shell profiles may be installed in either `${HADOOP_CONF_DIR}/shellprofile.d` or `${HADOOP_HOME}/libexec/shellprofile.d`. Shell profiles in the `libexec` directory are part of the base installation and cannot be overriden by the user. Shell profiles in the configuration directory may be ignored if the end user changes the configuration directory at runtime. +Shell profiles may be installed in either `${HADOOP_CONF_DIR}/shellprofile.d` or `${HADOOP_HOME}/libexec/shellprofile.d`. Shell profiles in the `libexec` directory are part of the base installation and cannot be overridden by the user. Shell profiles in the configuration directory may be ignored if the end user changes the configuration directory at runtime. An example of a shell profile is in the libexec directory. -## Shell API +### Shell API Apache Hadoop's shell code has a [function library](./UnixShellAPI.html) that is open for administrators and developers to use to assist in their configuration and advanced feature management. These APIs follow the standard [Apache Hadoop Interface Classification](./InterfaceClassification.html), with one addition: Replaceable. @@ -97,10 +97,8 @@ The shell code allows for core functions to be overridden. However, not all func In order to replace a function, create a file called `hadoop-user-functions.sh` in the `${HADOOP_CONF_DIR}` directory. Simply define the new, replacement function in this file and the system will pick it up automatically. There may be as many replacement functions as needed in this file. Examples of function replacement are in the `hadoop-user-functions.sh.examples` file. - Functions that are marked Public and Stable are safe to use in shell profiles as-is. Other functions may change in a minor release. - ### User-level API Access In addition to `.hadoop-env`, which allows individual users to override `hadoop-env.sh`, user's may also use `.hadooprc`. This is called after the Apache Hadoop shell environment has been configured and allows the full set of shell API function calls. @@ -112,3 +110,84 @@ hadoop_add_classpath /some/path/custom.jar ``` would go into `.hadooprc` + +### Dynamic Subcommands + +Utilizing the Shell API, it is possible for third parties to add their own subcommands to the primary Hadoop shell scripts (hadoop, hdfs, mapred, yarn). + +Prior to executing a subcommand, the primary scripts will check for the existence of a (scriptname)\_subcommand\_(subcommand) function. This function gets executed with the parameters set to all remaining command line arguments. For example, if the following function is defined: + +```bash +function yarn_subcommand_hello +{ + echo "$@" + exit $? +} +``` + +then executing `yarn --debug hello world I see you` will activate script debugging and call the `yarn_subcommand_hello` function as: + +```bash +yarn_subcommand_hello world I see you +``` + +which will result in the output of: + +```bash +world I see you +``` + +It is also possible to add the new subcommands to the usage output. The `hadoop_add_subcommand` function adds text to the usage output. Utilizing the standard HADOOP_SHELL_EXECNAME variable, we can limit which command gets our new function. + +```bash +if [[ "${HADOOP_SHELL_EXECNAME}" = "yarn" ]]; then + hadoop_add_subcommand "hello" "Print some text to the screen" +fi +``` + +This functionality may also be use to override the built-ins. For example, defining: + +```bash +function hdfs_subcommand_fetchdt +{ + ... +} +``` + +... will replace the existing `hdfs fetchdt` subcommand with a custom one. + +Some key environment variables related to Dynamic Subcommands: + +* HADOOP\_CLASSNAME + +This is the name of the Java class to use when program execution continues. + +* HADOOP\_SHELL\_EXECNAME + +This is the name of the script that is being executed. It will be one of hadoop, hdfs, mapred, or yarn. + +* HADOOP\_SUBCMD + +This is the subcommand that was passed on the command line. + +* HADOOP\_SUBCMD\_ARGS + +This array contains the argument list after the Apache Hadoop common argument processing has taken place and is the same list that is passed to the subcommand function as arguments. For example, if `hadoop --debug subcmd 1 2 3` has been executed on the command line, then `${HADOOP_SUBCMD_ARGS[0]}` will be 1 and `hadoop_subcommand_subcmd` will also have $1 equal to 1. This array list MAY be modified by subcommand functions to add or delete values from the argument list for further processing. + +* HADOOP\_SUBCMD\_SECURESERVICE + +If this command should/will be executed as a secure daemon, set this to true. + +* HADOOP\_SUBCMD\_SECUREUSER + +If this command should/will be executed as a secure daemon, set the user name to be used. + +* HADOOP\_SUBCMD\_SUPPORTDAEMONIZATION + +If this command can be executed as a daemon, set this to true. + +* HADOOP\_USER\_PARAMS + +This is the full content of the command line, prior to any parsing done. It will contain flags such as `--debug`. It MAY NOT be manipulated. + +The Apache Hadoop runtime facilities require functions exit if no further processing is required. For example, in the hello example above, Java and other facilities were not required so a simple `exit $?` was sufficient. However, if the function were to utilize `HADOOP_CLASSNAME`, then program execution must continue so that Java with the Apache Hadoop-specific parameters will be launched against the given Java class. Another example would be in the case of an unrecoverable error. It is the function's responsibility to print an appropriate message (preferably using the hadoop_error API call) and exit appropriately. diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash index be2d7f58b2..cc37268f93 100755 --- a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop-functions_test_helper.bash @@ -16,7 +16,7 @@ setup() { - TMP=../../../target/test-dir/bats.$$.${RANDOM} + TMP="${BATS_TEST_DIRNAME}/../../../target/test-dir/bats.$$.${RANDOM}" mkdir -p ${TMP} TMP=$(cd -P -- "${TMP}" >/dev/null && pwd -P) export TMP @@ -38,7 +38,7 @@ setup() { # shellcheck disable=SC2034 QATESTMODE=true - . ../../main/bin/hadoop-functions.sh + . "${BATS_TEST_DIRNAME}/../../main/bin/hadoop-functions.sh" pushd "${TMP}" >/dev/null } diff --git a/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommands.bats b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommands.bats new file mode 100755 index 0000000000..c004a30d99 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/scripts/hadoop_subcommands.bats @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load hadoop-functions_test_helper + +# the loading of shell profiles are tested elseswhere +# this only tests the specific subcommand parts + +subcommandsetup () { + export HADOOP_LIBEXEC_DIR="${TMP}/libexec" + export HADOOP_CONF_DIR="${TMP}/conf" + mkdir -p "${HADOOP_LIBEXEC_DIR}" + echo ". \"${BATS_TEST_DIRNAME}/../../main/bin/hadoop-functions.sh\"" > "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" + cat <<-'TOKEN' >> "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" + +hadoop_subcommand_sub () { + echo "unittest" + exit 0 +} + +hadoop_subcommand_conftest () +{ + echo conftest + exit 0 +} + +hadoop_subcommand_envcheck () +{ + echo ${HADOOP_SHELL_EXECNAME} + exit 0 +} + +hadoop_subcommand_multi () +{ + echo $2 + exit 0 +} +TOKEN + chmod a+rx "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +} + +@test "hadoop_subcommand (addition)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" sub + echo ">${output}<" + [ "${output}" = unittest ] +} + +@test "hadoop_subcommand (substitute)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" conftest + echo ">${output}<" + [ "${output}" = conftest ] +} + +@test "hadoop_subcommand (envcheck)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" envcheck + [ "${output}" = hadoop ] +} + +@test "hadoop_subcommand (multiparams)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hadoop" multi 1 2 + [ "${output}" = 2 ] +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 2d534bfd86..3696797dfc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -417,7 +417,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> - + startKdc @@ -559,5 +559,41 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + + + shelltest + + + !skipTests + + + + + + maven-antrun-plugin + + + hdfs-test-bats-driver + test + + run + + + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index c365250f59..2a29d1766e 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -16,7 +16,12 @@ # limitations under the License. MYNAME="${BASH_SOURCE-$0}" +HADOOP_SHELL_EXECNAME="${MYNAME##*/}" +## @description build up the hdfs command's usage text. +## @audience public +## @stability stable +## @replaceable no function hadoop_usage { hadoop_add_option "--buildpaths" "attempt to add class files from build tree" @@ -56,7 +61,194 @@ function hadoop_usage hadoop_add_subcommand "storagepolicies" "list/get/set block storage policies" hadoop_add_subcommand "version" "print the version" hadoop_add_subcommand "zkfc" "run the ZK Failover Controller daemon" - hadoop_generate_usage "${MYNAME}" false + hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" false +} + +## @description Default command handler for hadoop command +## @audience public +## @stability stable +## @replaceable no +## @param CLI arguments +function hdfscmd_case +{ + subcmd=$1 + shift + + case ${subcmd} in + balancer) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.balancer.Balancer + hadoop_debug "Appending HADOOP_BALANCER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}" + ;; + cacheadmin) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.CacheAdmin + ;; + classpath) + hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@" + ;; + crypto) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.CryptoAdmin + ;; + datanode) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + # Determine if we're starting a secure datanode, and + # if so, redefine appropriate variables + if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then + HADOOP_SUBCMD_SECURESERVICE="true" + HADOOP_SUBCMD_SECUREUSER="${HADOOP_SECURE_DN_USER}" + + # backward compatiblity + HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}" + HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}" + + hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS" + hadoop_debug "Appending HADOOP_DN_SECURE_EXTRA_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS}" + HADOOP_CLASSNAME="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter" + else + hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}" + HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.datanode.DataNode' + fi + ;; + debug) + HADOOP_CLASSNAME='org.apache.hadoop.hdfs.tools.DebugAdmin' + ;; + dfs) + HADOOP_CLASSNAME=org.apache.hadoop.fs.FsShell + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + dfsadmin) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSAdmin + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + envvars) + echo "JAVA_HOME='${JAVA_HOME}'" + echo "HADOOP_HDFS_HOME='${HADOOP_HDFS_HOME}'" + echo "HDFS_DIR='${HDFS_DIR}'" + echo "HDFS_LIB_JARS_DIR='${HDFS_LIB_JARS_DIR}'" + echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" + echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" + echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" + echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" + exit 0 + ;; + erasurecode) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.erasurecode.ECCli + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + fetchdt) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher + ;; + fsck) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSck + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + getconf) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.GetConf + ;; + groups) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.GetGroups + ;; + haadmin) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.DFSHAAdmin + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + journalnode) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.hdfs.qjournal.server.JournalNode' + hadoop_debug "Appending HADOOP_JOURNALNODE_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}" + ;; + jmxget) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.JMXGet + ;; + lsSnapshottableDir) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir + ;; + mover) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.server.mover.Mover + hadoop_debug "Appending HADOOP_MOVER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_MOVER_OPTS}" + ;; + namenode) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.namenode.NameNode' + hadoop_debug "Appending HADOOP_NAMENODE_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}" + hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" + ;; + nfs3) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then + HADOOP_SUBCMD_SECURESERVICE="true" + HADOOP_SUBCMD_SECUREUSER="${HADOOP_PRIVILEGED_NFS_USER}" + + # backward compatiblity + HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}" + HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}" + + hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS" + hadoop_debug "Appending HADOOP_NFS3_SECURE_EXTRA_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS}" + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter + else + hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}" + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3 + fi + ;; + oev) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer + ;; + oiv) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB + ;; + oiv_legacy) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer + ;; + portmap) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME=org.apache.hadoop.portmap.Portmap + hadoop_debug "Appending HADOOP_PORTMAP_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}" + ;; + secondarynamenode) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' + hadoop_debug "Appending HADOOP_SECONDARYNAMENODE_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}" + hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" + ;; + snapshotDiff) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff + ;; + storagepolicies) + HADOOP_CLASSNAME=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin + ;; + version) + HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo + ;; + zkfc) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' + hadoop_debug "Appending HADOOP_ZKFC_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}" + ;; + *) + HADOOP_CLASSNAME="${subcmd}" + if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then + hadoop_exit_with_usage 1 + fi + ;; + esac } # let's locate libexec... @@ -81,232 +273,76 @@ if [[ $# = 0 ]]; then hadoop_exit_with_usage 1 fi -COMMAND=$1 +HADOOP_SUBCMD=$1 shift -case ${COMMAND} in - balancer) - supportdaemonization="true" - CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer - hadoop_debug "Appending HADOOP_BALANCER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}" - ;; - cacheadmin) - CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin - ;; - classpath) - hadoop_do_classpath_subcommand CLASS "$@" - ;; - crypto) - CLASS=org.apache.hadoop.hdfs.tools.CryptoAdmin - ;; - datanode) - supportdaemonization="true" - # Determine if we're starting a secure datanode, and - # if so, redefine appropriate variables - if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then - secure_service="true" - secure_user="${HADOOP_SECURE_DN_USER}" +HADOOP_SUBCMD_ARGS=("$@") - # backward compatiblity - HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}" - HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}" +if declare -f hdfs_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then + hadoop_debug "Calling dynamically: hdfs_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}" + "hdfs_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +else + hdfscmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +fi - hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS" - hadoop_debug "Appending HADOOP_DN_SECURE_EXTRA_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS}" - CLASS="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter" - else - hadoop_debug "Appending HADOOP_DATANODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}" - CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode' - fi - ;; - debug) - CLASS='org.apache.hadoop.hdfs.tools.DebugAdmin' - ;; - dfs) - CLASS=org.apache.hadoop.fs.FsShell - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - dfsadmin) - CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - envvars) - echo "JAVA_HOME='${JAVA_HOME}'" - echo "HADOOP_HDFS_HOME='${HADOOP_HDFS_HOME}'" - echo "HDFS_DIR='${HDFS_DIR}'" - echo "HDFS_LIB_JARS_DIR='${HDFS_LIB_JARS_DIR}'" - echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" - echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" - echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" - echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" - exit 0 - ;; - erasurecode) - CLASS=org.apache.hadoop.hdfs.tools.erasurecode.ECCli - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - fetchdt) - CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher - ;; - fsck) - CLASS=org.apache.hadoop.hdfs.tools.DFSck - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - getconf) - CLASS=org.apache.hadoop.hdfs.tools.GetConf - ;; - groups) - CLASS=org.apache.hadoop.hdfs.tools.GetGroups - ;; - haadmin) - CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - journalnode) - supportdaemonization="true" - CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode' - hadoop_debug "Appending HADOOP_JOURNALNODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}" - ;; - jmxget) - CLASS=org.apache.hadoop.hdfs.tools.JMXGet - ;; - lsSnapshottableDir) - CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir - ;; - mover) - supportdaemonization="true" - CLASS=org.apache.hadoop.hdfs.server.mover.Mover - hadoop_debug "Appending HADOOP_MOVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_MOVER_OPTS}" - ;; - namenode) - supportdaemonization="true" - CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' - hadoop_debug "Appending HADOOP_NAMENODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}" - hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" - ;; - nfs3) - supportdaemonization="true" - if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then - secure_service="true" - secure_user="${HADOOP_PRIVILEGED_NFS_USER}" - - # backward compatiblity - HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}" - HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}" - - hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS" - hadoop_debug "Appending HADOOP_NFS3_SECURE_EXTRA_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS}" - CLASS=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter - else - hadoop_debug "Appending HADOOP_NFS3_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}" - CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3 - fi - ;; - oev) - CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer - ;; - oiv) - CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB - ;; - oiv_legacy) - CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer - ;; - portmap) - supportdaemonization="true" - CLASS=org.apache.hadoop.portmap.Portmap - hadoop_debug "Appending HADOOP_PORTMAP_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}" - ;; - secondarynamenode) - supportdaemonization="true" - CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' - hadoop_debug "Appending HADOOP_SECONDARYNAMENODE_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}" - hadoop_add_param HADOOP_OPTS hdfs.audit.logger "-Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER}" - ;; - snapshotDiff) - CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff - ;; - storagepolicies) - CLASS=org.apache.hadoop.hdfs.tools.StoragePolicyAdmin - ;; - version) - CLASS=org.apache.hadoop.util.VersionInfo - ;; - zkfc) - supportdaemonization="true" - CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' - hadoop_debug "Appending HADOOP_ZKFC_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}" - ;; - *) - CLASS="${COMMAND}" - if ! hadoop_validate_classname "${CLASS}"; then - hadoop_exit_with_usage 1 - fi - ;; -esac - -hadoop_verify_user "${COMMAND}" +hadoop_verify_user "${HADOOP_SUBCMD}" if [[ ${HADOOP_SLAVE_MODE} = true ]]; then hadoop_common_slave_mode_execute "${HADOOP_HDFS_HOME}/bin/hdfs" "${HADOOP_USER_PARAMS[@]}" exit $? fi -if [[ -n "${secure_service}" ]]; then - HADOOP_SECURE_USER="${secure_user}" +if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" hadoop_verify_secure_prereq hadoop_setup_secure_service - priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out" - priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.err" - priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND}.pid" - daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out" - daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}.pid" + priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err" + priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" else - daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out" - daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid" + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" fi if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then # shellcheck disable=SC2034 HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}" - if [[ -n "${secure_service}" ]]; then + if [[ -n "${HADOOP_SUBCMD_SECURESERVICE}" ]]; then # shellcheck disable=SC2034 - HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log" + HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log" else # shellcheck disable=SC2034 - HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log" + HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log" fi fi hadoop_finalize -if [[ -n "${supportdaemonization}" ]]; then - if [[ -n "${secure_service}" ]]; then +if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then hadoop_secure_daemon_handler \ - "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\ - "${daemon_pidfile}" "${daemon_outfile}" \ - "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@" + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${priv_pidfile}" \ + "${priv_outfile}" \ + "${priv_errfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" else - hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\ - "${daemon_pidfile}" "${daemon_outfile}" "$@" + hadoop_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" fi exit $? else # shellcheck disable=SC2086 - hadoop_java_exec "${COMMAND}" "${CLASS}" "$@" + hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}" fi diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/hdfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/hadoop-hdfs.sh similarity index 98% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/hdfs.sh rename to hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/hadoop-hdfs.sh index 5eb9e48264..398f8a0152 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/hdfs.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/shellprofile.d/hadoop-hdfs.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs-functions_test_helper.bash b/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs-functions_test_helper.bash new file mode 100755 index 0000000000..d3cdda4c1b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs-functions_test_helper.bash @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +setup() { + + TMP="${BATS_TEST_DIRNAME}/../../../target/test-dir/bats.$$.${RANDOM}" + mkdir -p "${TMP}" + TMP=$(cd -P -- "${TMP}" >/dev/null && pwd -P) + export TMP + TESTBINDIR="${BATS_TEST_DIRNAME}" + HADOOP_LIBEXEC_DIR=${TESTBINDIR}/../../main/bin + HADOOP_LIBEXEC_DIR=$(cd -P -- "${HADOOP_LIBEXEC_DIR}" >/dev/null && pwd -P) + + # shellcheck disable=SC2034 + HADOOP_SHELL_SCRIPT_DEBUG=true + unset HADOOP_CONF_DIR + # we unset both of these for bw compat + unset HADOOP_HOME + unset HADOOP_PREFIX + + echo "bindir: ${TESTBINDIR}" 2>&1 + + mkdir -p "${TMP}" + + # shellcheck disable=SC2034 + QATESTMODE=true + + # shellcheck disable=SC1090 + . "${BATS_TEST_DIRNAME}/../../../../../hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh" + pushd "${TMP}" >/dev/null +} + +teardown() { + popd >/dev/null + rm -rf "${TMP}" +} + + +strstr() { + if [ "${1#*$2}" != "${1}" ]; then + echo true + else + echo false + fi +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs_subcommands.bats b/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs_subcommands.bats new file mode 100755 index 0000000000..d074ab9815 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/hdfs_subcommands.bats @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load hdfs-functions_test_helper + +# the loading of shell profiles are tested elseswhere +# this only tests the specific subcommand parts + +subcommandsetup () { + export HADOOP_LIBEXEC_DIR="${TMP}/libexec" + export HADOOP_CONF_DIR="${TMP}/conf" + mkdir -p "${HADOOP_LIBEXEC_DIR}" + echo ". \"${BATS_TEST_DIRNAME}/../../../../../hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh\"" > "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" + cat <<-'TOKEN' >> "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" + +hdfs_subcommand_sub () { + echo "unittest" + exit 0 +} + +hdfs_subcommand_cacheadmin () +{ + echo cacheadmin + exit 0 +} + +hdfs_subcommand_envcheck () +{ + echo ${HADOOP_SHELL_EXECNAME} + exit 0 +} + +hdfs_subcommand_multi () +{ + echo $2 + exit 0 +} +TOKEN + chmod a+rx "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" +} + +@test "hdfs_subcommand (addition)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" sub + echo ">${output}<" + [ "${output}" = unittest ] +} + +@test "hdfs_subcommand (substitute)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" cacheadmin + echo ">${output}<" + [ "${output}" = cacheadmin ] +} + +@test "hdfs_subcommand (envcheck)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" envcheck + [ "${output}" = hdfs ] +} + +@test "hdfs_subcommand (multiparams)" { + subcommandsetup + run "${BATS_TEST_DIRNAME}/../../main/bin/hdfs" multi 1 2 + [ "${output}" = 2 ] +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/run-bats.sh b/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/run-bats.sh new file mode 100755 index 0000000000..566f47a211 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/scripts/run-bats.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +targetdir=../../../target +mkdir -p ${targetdir}/surefire-reports ${targetdir}/tap + +batsexe=$(which bats) 2>/dev/null + +if [[ -z ${batsexe} ]]; then + echo "not ok - no bats executable found" > "${targetdir}/tap/shelltest.tap" + echo "" + echo "" + echo "ERROR: bats not installed. Skipping bash tests." + echo "ERROR: Please install bats as soon as possible." + echo "" + echo "" + exit 0 +fi + +for j in *.bats; do + echo Running bats -t "${j}" + bats -t "${j}" 2>&1 | tee "${targetdir}/tap/${j}.tap" + result=${PIPESTATUS[0]} + ((exitcode=exitcode+result)) +done + +if [[ ${exitcode} -gt 0 ]]; then + exit 1 +fi +exit 0 diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index f280f311f2..046d48c4cf 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -16,13 +16,15 @@ # limitations under the License. MYNAME="${BASH_SOURCE-$0}" +HADOOP_SHELL_EXECNAME="${MYNAME##*/}" +## @description build up the mapred command's usage text. +## @audience public +## @stability stable +## @replaceable no function hadoop_usage { - hadoop_add_subcommand "archive" "create a hadoop archive" - hadoop_add_subcommand "archive-logs" "combine aggregated logs into hadoop archives" hadoop_add_subcommand "classpath" "prints the class path needed for running mapreduce subcommands" - hadoop_add_subcommand "distcp" "copy file or directories recursively" hadoop_add_subcommand "envvars" "display computed Hadoop environment variables" hadoop_add_subcommand "historyserver" "run job history servers as a standalone daemon" hadoop_add_subcommand "hsadmin" "job history server admin interface" @@ -31,7 +33,85 @@ function hadoop_usage hadoop_add_subcommand "queue" "get information regarding JobQueues" hadoop_add_subcommand "sampler" "sampler" hadoop_add_subcommand "version" "print the version" - hadoop_generate_usage "${MYNAME}" true + hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true +} + +## @description Default command handler for hadoop command +## @audience public +## @stability stable +## @replaceable no +## @param CLI arguments +function mapredcmd_case +{ + subcmd=$1 + shift + + case ${subcmd} in + mradmin|jobtracker|tasktracker|groups) + hadoop_error "Sorry, the ${subcmd} command is no longer supported." + hadoop_error "You may find similar functionality with the \"yarn\" shell command." + hadoop_exit_with_usage 1 + ;; + classpath) + hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@" + ;; + envvars) + echo "JAVA_HOME='${JAVA_HOME}'" + echo "HADOOP_MAPRED_HOME='${HADOOP_MAPRED_HOME}'" + echo "MAPRED_DIR='${MAPRED_DIR}'" + echo "MAPRED_LIB_JARS_DIR='${MAPRED_LIB_JARS_DIR}'" + echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" + echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" + echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" + echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" + exit 0 + ;; + historyserver) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer + hadoop_debug "Appending HADOOP_JOB_HISTORYSERVER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}" + if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then + # shellcheck disable=SC2034 + HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" + fi + HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER} + ;; + hsadmin) + HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + job) + HADOOP_CLASSNAME=org.apache.hadoop.mapred.JobClient + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + pipes) + HADOOP_CLASSNAME=org.apache.hadoop.mapred.pipes.Submitter + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + queue) + HADOOP_CLASSNAME=org.apache.hadoop.mapred.JobQueueClient + ;; + sampler) + HADOOP_CLASSNAME=org.apache.hadoop.mapred.lib.InputSampler + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + version) + HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + ;; + *) + HADOOP_CLASSNAME="${subcmd}" + if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then + hadoop_exit_with_usage 1 + fi + ;; + esac } bin=$(cd -P -- "$(dirname -- "${MYNAME}")" >/dev/null && pwd -P) @@ -58,125 +138,71 @@ if [ $# = 0 ]; then hadoop_exit_with_usage 1 fi -COMMAND=$1 +HADOOP_SUBCMD=$1 shift -case ${COMMAND} in - mradmin|jobtracker|tasktracker|groups) - hadoop_error "Sorry, the ${COMMAND} command is no longer supported." - hadoop_error "You may find similar functionality with the \"yarn\" shell command." - hadoop_exit_with_usage 1 - ;; - archive) - CLASS=org.apache.hadoop.tools.HadoopArchives - hadoop_add_to_classpath_tools hadoop-archives - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - archive-logs) - CLASS=org.apache.hadoop.tools.HadoopArchiveLogs - hadoop_add_to_classpath_tools hadoop-archive-logs - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - classpath) - hadoop_do_classpath_subcommand CLASS "$@" - ;; - distcp) - CLASS=org.apache.hadoop.tools.DistCp - hadoop_add_to_classpath_tools hadoop-distcp - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - envvars) - echo "JAVA_HOME='${JAVA_HOME}'" - echo "HADOOP_MAPRED_HOME='${HADOOP_MAPRED_HOME}'" - echo "MAPRED_DIR='${MAPRED_DIR}'" - echo "MAPRED_LIB_JARS_DIR='${MAPRED_LIB_JARS_DIR}'" - echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" - echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" - echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" - echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" - exit 0 - ;; - historyserver) - supportdaemonization="true" - CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer - hadoop_debug "Appending HADOOP_JOB_HISTORYSERVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}" - if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then - HADOOP_HEAPSIZE_MAX="${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" - fi - HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER} - ;; - hsadmin) - CLASS=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - job) - CLASS=org.apache.hadoop.mapred.JobClient - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - pipes) - CLASS=org.apache.hadoop.mapred.pipes.Submitter - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - queue) - CLASS=org.apache.hadoop.mapred.JobQueueClient - ;; - sampler) - CLASS=org.apache.hadoop.mapred.lib.InputSampler - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - version) - CLASS=org.apache.hadoop.util.VersionInfo - hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" - ;; - *) - CLASS="${COMMAND}" - if ! hadoop_validate_classname "${CLASS}"; then - hadoop_exit_with_usage 1 - fi - ;; -esac +HADOOP_SUBCMD_ARGS=("$@") -hadoop_verify_user "${COMMAND}" +if declare -f mapred_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then + hadoop_debug "Calling dynamically: mapred_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}" + "mapred_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +else + mapredcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +fi + +hadoop_verify_user "${HADOOP_SUBCMD}" if [[ ${HADOOP_SLAVE_MODE} = true ]]; then hadoop_common_slave_mode_execute "${HADOOP_MAPRED_HOME}/bin/mapred" "${HADOOP_USER_PARAMS[@]}" exit $? fi -daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out" -daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid" - +if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + hadoop_verify_secure_prereq + hadoop_setup_secure_service + priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err" + priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" +else + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" +fi if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then # shellcheck disable=SC2034 HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}" hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}" # shellcheck disable=SC2034 - HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log" + HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log" fi hadoop_finalize -if [[ -n "${supportdaemonization}" ]]; then - if [[ -n "${secure_service}" ]]; then - hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}"\ - "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \ - "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@" +if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + hadoop_secure_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${priv_pidfile}" \ + "${priv_outfile}" \ + "${priv_errfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" else - hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \ - "${daemon_pidfile}" "${daemon_outfile}" "$@" + hadoop_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" fi exit $? else - hadoop_java_exec "${COMMAND}" "${CLASS}" "$@" + hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}" fi - diff --git a/hadoop-mapreduce-project/shellprofile.d/mapreduce.sh b/hadoop-mapreduce-project/shellprofile.d/hadoop-mapreduce.sh similarity index 98% rename from hadoop-mapreduce-project/shellprofile.d/mapreduce.sh rename to hadoop-mapreduce-project/shellprofile.d/hadoop-mapreduce.sh index 0b3dab13a9..b0e3adbb20 100644 --- a/hadoop-mapreduce-project/shellprofile.d/mapreduce.sh +++ b/hadoop-mapreduce-project/shellprofile.d/hadoop-mapreduce.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. diff --git a/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh b/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh new file mode 100755 index 0000000000..ae7b6c67d1 --- /dev/null +++ b/hadoop-tools/hadoop-archive-logs/src/main/shellprofile.d/hadoop-archive-logs.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f mapred_subcommand_archive-logs >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then + hadoop_add_subcommand "archive-logs" "combine aggregated logs into hadoop archives" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description archive-logs command for mapred +## @audience public +## @stability stable +## @replaceable yes +function mapred_subcommand_archive-logs +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchiveLogs + hadoop_add_to_classpath_tools hadoop-archive-logs + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +} + +fi diff --git a/hadoop-tools/hadoop-archives/src/main/shellprofile.d/hadoop-archives.sh b/hadoop-tools/hadoop-archives/src/main/shellprofile.d/hadoop-archives.sh new file mode 100755 index 0000000000..f74fe5ba8f --- /dev/null +++ b/hadoop-tools/hadoop-archives/src/main/shellprofile.d/hadoop-archives.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f hadoop_subcommand_archive >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "archive" "create a Hadoop archive" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description archive command for hadoop (and mapred) +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_archive +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchives + hadoop_add_to_classpath_tools hadoop-archives +} + +fi + +if ! declare -f mapred_subcommand_archive >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then + hadoop_add_subcommand "archive" "create a Hadoop archive" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description archive command for mapred (calls hadoop version) +## @audience public +## @stability stable +## @replaceable yes +function mapred_subcommand_archive +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.HadoopArchives + hadoop_add_to_classpath_tools hadoop-archives +} + +fi diff --git a/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh b/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh new file mode 100755 index 0000000000..0178c54e91 --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/main/shellprofile.d/hadoop-distcp.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f hadoop_subcommand_distcp >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "distcp" "copy file or directories recursively" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description distcp command for hadoop +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_distcp +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCp + hadoop_add_to_classpath_tools hadoop-distcp + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +} + +fi + +if ! declare -f mapred_subcommand_distcp >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then + hadoop_add_subcommand "distcp" "copy file or directories recursively" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description distcp command for mapred (calls hadoop) +## @audience public +## @stability stable +## @replaceable yes +function mapred_subcommand_distcp +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCp + hadoop_add_to_classpath_tools hadoop-distcp + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +} + +fi diff --git a/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh b/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh new file mode 100755 index 0000000000..829d406292 --- /dev/null +++ b/hadoop-tools/hadoop-extras/src/main/shellprofile.d/hadoop-extras.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f hadoop_subcommand_distch >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "distch" "distributed metadata changer" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description distch command for hadoop +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_distch +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.DistCh + hadoop_add_to_classpath_tools hadoop-extras + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +} + +fi diff --git a/hadoop-tools/hadoop-gridmix/src/main/shellprofile.d/hadoop-gridmix.sh b/hadoop-tools/hadoop-gridmix/src/main/shellprofile.d/hadoop-gridmix.sh new file mode 100755 index 0000000000..b7887ba362 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/main/shellprofile.d/hadoop-gridmix.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f hadoop_subcommand_gridmix >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "gridmix" "submit a mix of synthetic job, modeling a profiled from production load" + fi + +## @description gridmix command for hadoop +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_gridmix +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.mapred.gridmix.Gridmix + hadoop_add_to_classpath_tools hadoop-rumen + hadoop_add_to_classpath_tools hadoop-gridmix +} + +fi diff --git a/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh b/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh new file mode 100755 index 0000000000..d7d4022453 --- /dev/null +++ b/hadoop-tools/hadoop-rumen/src/main/shellprofile.d/hadoop-rumen.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f hadoop_subcommand_rumenfolder >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "rumenfolder" "scale a rumen input trace" + fi + +## @description rumenfolder command for hadoop +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_rumenfolder +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.rumen.Folder + hadoop_add_to_classpath_tools hadoop-rumen + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +} + +fi + +if ! declare -f hadoop_subcommand_rumentrace >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "rumentrace" "convert logs into a rumen trace" + fi + +## @description rumentrace command for hadoop +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_rumentrace +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.tools.rumen.TraceBuilder + hadoop_add_to_classpath_tools hadoop-rumen + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" +} + +fi diff --git a/hadoop-tools/hadoop-rumen/src/site/markdown/Rumen.md.vm b/hadoop-tools/hadoop-rumen/src/site/markdown/Rumen.md.vm index bee976a0e9..34dfd0bf59 100644 --- a/hadoop-tools/hadoop-rumen/src/site/markdown/Rumen.md.vm +++ b/hadoop-tools/hadoop-rumen/src/site/markdown/Rumen.md.vm @@ -50,8 +50,8 @@ but a simulation of the scheduler elects to run that task on a remote rack, the simulator requires a runtime its input cannot provide. To fill in these gaps, Rumen performs a statistical analysis of the digest to estimate the variables the trace doesn't supply. Rumen traces -drive both Gridmix (a benchmark of Hadoop MapReduce clusters) and Mumak -(a simulator for the JobTracker). +drive both Gridmix (a benchmark of Hadoop MapReduce clusters) and SLS +(a simulator for the resource manager scheduler). $H3 Motivation @@ -126,16 +126,13 @@ can use the `Folder` utility to fold the current trace to the desired length. The remaining part of this section explains these utilities in detail. -Examples in this section assumes that certain libraries are present -in the java CLASSPATH. See [Dependencies](#Dependencies) for more details. - $H3 Trace Builder $H4 Command ``` -java org.apache.hadoop.tools.rumen.TraceBuilder [options] +hadoop rumentrace [options] ``` This command invokes the `TraceBuilder` utility of *Rumen*. @@ -205,12 +202,8 @@ $H4 Options $H4 Example -*Rumen* expects certain library *JARs* to be present in the *CLASSPATH*. -One simple way to run Rumen is to use -`$HADOOP_HOME/bin/hadoop jar` command to run it as example below. - ``` -java org.apache.hadoop.tools.rumen.TraceBuilder \ +hadoop rumentrace \ file:///tmp/job-trace.json \ file:///tmp/job-topology.json \ hdfs:///tmp/hadoop-yarn/staging/history/done_intermediate/testuser @@ -229,7 +222,7 @@ $H3 Folder $H4 Command ``` -java org.apache.hadoop.tools.rumen.Folder [options] [input] [output] +hadoop rumenfolder [options] [input] [output] ``` This command invokes the `Folder` utility of @@ -350,7 +343,7 @@ $H4 Examples $H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime ``` -java org.apache.hadoop.tools.rumen.Folder \ +hadoop rumenfolder \ -output-duration 1h \ -input-cycle 20m \ file:///tmp/job-trace.json \ @@ -362,7 +355,7 @@ If the folded jobs are out of order then the command will bail out. $H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime and tolerate some skewness ``` -java org.apache.hadoop.tools.rumen.Folder \ +hadoop rumenfolder \ -output-duration 1h \ -input-cycle 20m \ -allow-missorting \ @@ -378,7 +371,7 @@ If the folded jobs are out of order, then atmost $H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime in debug mode ``` -java org.apache.hadoop.tools.rumen.Folder \ +hadoop rumenfolder \ -output-duration 1h \ -input-cycle 20m \ -debug -temp-directory file:///tmp/debug \ @@ -395,7 +388,7 @@ up. $H5 Folding an input trace with 10 hours of total runtime to generate an output trace with 1 hour of total runtime with custom concentration. ``` -java org.apache.hadoop.tools.rumen.Folder \ +hadoop rumenfolder \ -output-duration 1h \ -input-cycle 20m \ -concentration 2 \ @@ -421,18 +414,3 @@ Look at the MapReduce rumen-component for further details. - -$H3 Dependencies - -*Rumen* expects certain library *JARs* to be present in the *CLASSPATH*. -One simple way to run Rumen is to use -`hadoop jar` command to run it as example below. - -``` -$HADOOP_HOME/bin/hadoop jar \ - $HADOOP_HOME/share/hadoop/tools/lib/hadoop-rumen-2.5.1.jar \ - org.apache.hadoop.tools.rumen.TraceBuilder \ - file:///tmp/job-trace.json \ - file:///tmp/job-topology.json \ - hdfs:///tmp/hadoop-yarn/staging/history/done_intermediate/testuser -``` diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/DumpTypedBytes.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/DumpTypedBytes.java index 5a07cc325c..ffddc7cc11 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/DumpTypedBytes.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/DumpTypedBytes.java @@ -91,8 +91,7 @@ public int run(String[] args) throws Exception { } private void printUsage() { - System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar hadoop-streaming.jar" - + " dumptb "); + System.out.println("Usage: mapred streaming dumptb "); System.out.println(" Dumps all files that match the given pattern to " + "standard output as typed bytes."); System.out.println(" The files can be text or sequence files"); diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/HadoopStreaming.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/HadoopStreaming.java index eabf46c83c..92f9d03866 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/HadoopStreaming.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/HadoopStreaming.java @@ -56,8 +56,7 @@ public static void main(String[] args) throws Exception { } private static void printUsage() { - System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar hadoop-streaming.jar" - + " [options]"); + System.out.println("Usage: mapred streaming [options]"); System.out.println("Options:"); System.out.println(" dumptb Dumps all files that match the" + " given pattern to "); diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/LoadTypedBytes.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/LoadTypedBytes.java index a7a001cff6..838cfa1627 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/LoadTypedBytes.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/LoadTypedBytes.java @@ -89,8 +89,7 @@ public int run(String[] args) throws Exception { } private void printUsage() { - System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar hadoop-streaming.jar" - + " loadtb "); + System.out.println("Usage: mapred streaming loadtb "); System.out.println(" Reads typed bytes from standard input" + " and stores them in a sequence file in"); System.out.println(" the specified path"); diff --git a/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh b/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh new file mode 100755 index 0000000000..cca016d860 --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/main/shellprofile.d/hadoop-streaming.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f mapred_subcommand_streaming >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = mapred ]]; then + hadoop_add_subcommand "streaming" "launch a mapreduce streaming job" + fi + +## @description streaming command for mapred +## @audience public +## @stability stable +## @replaceable yes +function mapred_subcommand_streaming +{ + declare jarname + declare oldifs + + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar + hadoop_add_to_classpath_tools hadoop-streaming + + # locate the streaming jar so we have something to + # give to RunJar + oldifs=${IFS} + IFS=: + for jarname in ${CLASSPATH}; do + if [[ "${jarname}" =~ hadoop-streaming-[0-9] ]]; then + HADOOP_SUBCMD_ARGS=("${jarname}" "${HADOOP_SUBCMD_ARGS[@]}") + break + fi + done + + IFS=${oldifs} + + hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}" + +} + +fi diff --git a/hadoop-tools/hadoop-streaming/src/site/markdown/HadoopStreaming.md.vm b/hadoop-tools/hadoop-streaming/src/site/markdown/HadoopStreaming.md.vm index cc8ed692a1..072a68be47 100644 --- a/hadoop-tools/hadoop-streaming/src/site/markdown/HadoopStreaming.md.vm +++ b/hadoop-tools/hadoop-streaming/src/site/markdown/HadoopStreaming.md.vm @@ -62,7 +62,7 @@ Hadoop Streaming Hadoop streaming is a utility that comes with the Hadoop distribution. The utility allows you to create and run Map/Reduce jobs with any executable or script as the mapper and/or the reducer. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -input myInputDirs \ -output myOutputDir \ -mapper /bin/cat \ @@ -88,7 +88,7 @@ Streaming supports streaming command options as well as [generic command options **Note:** Be sure to place the generic options before the streaming options, otherwise the command will fail. For an example, see [Making Archives Available to Tasks](#Making_Archives_Available_to_Tasks). - hadoop command [genericOptions] [streamingOptions] + mapred streaming [genericOptions] [streamingOptions] The Hadoop streaming command options are listed here: @@ -115,7 +115,7 @@ $H3 Specifying a Java Class as the Mapper/Reducer You can supply a Java class as the mapper and/or the reducer. - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -input myInputDirs \ -output myOutputDir \ -inputformat org.apache.hadoop.mapred.KeyValueTextInputFormat \ @@ -128,7 +128,7 @@ $H3 Packaging Files With Job Submissions You can specify any executable as the mapper and/or the reducer. The executables do not need to pre-exist on the machines in the cluster; however, if they don't, you will need to use "-file" option to tell the framework to pack your executable files as a part of job submission. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -input myInputDirs \ -output myOutputDir \ -mapper myPythonScript.py \ @@ -139,7 +139,7 @@ The above example specifies a user defined Python executable as the mapper. The In addition to executable files, you can also package other auxiliary files (such as dictionaries, configuration files, etc) that may be used by the mapper and/or the reducer. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -input myInputDirs \ -output myOutputDir \ -mapper myPythonScript.py \ @@ -216,7 +216,7 @@ $H4 Specifying the Number of Reducers To specify the number of reducers, for example two, use: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -D mapreduce.job.reduces=2 \ -input myInputDirs \ -output myOutputDir \ @@ -229,7 +229,7 @@ As noted earlier, when the Map/Reduce framework reads a line from the stdout of However, you can customize this default. You can specify a field separator other than the tab character (the default), and you can specify the nth (n \>= 1) character rather than the first character in a line (the default) as the separator between the key and value. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -D stream.map.output.field.separator=. \ -D stream.num.map.output.key.fields=4 \ -input myInputDirs \ @@ -279,7 +279,7 @@ User can specify a different symlink name for -archives using \#. In this example, the input.txt file has two lines specifying the names of the two files: cachedir.jar/cache.txt and cachedir.jar/cache2.txt. "cachedir.jar" is a symlink to the archived directory, which has the files "cache.txt" and "cache2.txt". - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -archives 'hdfs://hadoop-nn1.example.com/user/me/samples/cachefile/cachedir.jar' \ -D mapreduce.job.maps=1 \ -D mapreduce.job.reduces=1 \ @@ -325,7 +325,7 @@ $H3 Hadoop Partitioner Class Hadoop has a library class, [KeyFieldBasedPartitioner](../api/org/apache/hadoop/mapred/lib/KeyFieldBasedPartitioner.html), that is useful for many applications. This class allows the Map/Reduce framework to partition the map outputs based on certain key fields, not the whole keys. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -D stream.map.output.field.separator=. \ -D stream.num.map.output.key.fields=4 \ -D map.output.key.field.separator=. \ @@ -375,7 +375,7 @@ $H3 Hadoop Comparator Class Hadoop has a library class, [KeyFieldBasedComparator](../api/org/apache/hadoop/mapreduce/lib/partition/KeyFieldBasedComparator.html), that is useful for many applications. This class provides a subset of features provided by the Unix/GNU Sort. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -D mapreduce.job.output.key.comparator.class=org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator \ -D stream.map.output.field.separator=. \ -D stream.num.map.output.key.fields=4 \ @@ -411,7 +411,7 @@ Hadoop has a library package called [Aggregate](../api/org/apache/hadoop/mapred/ To use Aggregate, simply specify "-reducer aggregate": - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -input myInputDirs \ -output myOutputDir \ -mapper myAggregatorForKeyCount.py \ @@ -444,7 +444,7 @@ $H3 Hadoop Field Selection Class Hadoop has a library class, [FieldSelectionMapReduce](../api/org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.html), that effectively allows you to process text data like the unix "cut" utility. The map function defined in the class treats each input key/value pair as a list of fields. You can specify the field separator (the default is the tab character). You can select an arbitrary list of fields as the map output key, and an arbitrary list of fields as the map output value. Similarly, the reduce function defined in the class treats each input key/value pair as a list of fields. You can select an arbitrary list of fields as the reduce output key, and an arbitrary list of fields as the reduce output value. For example: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -D mapreduce.map.output.key.field.separator=. \ -D mapreduce.partition.keypartitioner.options=-k1,2 \ -D mapreduce.fieldsel.data.field.separator=. \ @@ -495,7 +495,7 @@ Using an alias will not work, but variable substitution is allowed as shown in t charlie 80 dan 75 - $ c2='cut -f2'; hadoop jar hadoop-streaming-${project.version}.jar \ + $ c2='cut -f2'; mapred streaming \ -D mapreduce.job.name='Experiment' \ -input /user/me/samples/student_marks \ -output /user/me/samples/student_out \ @@ -525,7 +525,7 @@ $H3 How do I specify multiple input directories? You can specify multiple input directories with multiple '-input' options: - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -input '/user/foo/dir1' -input '/user/foo/dir2' \ (rest of the command) @@ -541,7 +541,7 @@ $H3 How do I parse XML documents using streaming? You can use the record reader StreamXmlRecordReader to process XML documents. - hadoop jar hadoop-streaming-${project.version}.jar \ + mapred streaming \ -inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" \ (rest of the command) diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index cac3bb6c0c..878c95f662 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -16,7 +16,12 @@ # limitations under the License. MYNAME="${BASH_SOURCE-$0}" +HADOOP_SHELL_EXECNAME="${MYNAME##*/}" +## @description build up the yarn command's usage text. +## @audience public +## @stability stable +## @replaceable no function hadoop_usage { hadoop_add_option "--buildpaths" "attempt to add class files from build tree" @@ -46,9 +51,180 @@ function hadoop_usage hadoop_add_subcommand "timelineserver" "run the timeline server" hadoop_add_subcommand "top" "view cluster information" hadoop_add_subcommand "version" "print the version" - hadoop_generate_usage "${MYNAME}" true + hadoop_generate_usage "${HADOOP_SHELL_EXECNAME}" true } +## @description Default command handler for yarn command +## @audience public +## @stability stable +## @replaceable no +## @param CLI arguments +function yarncmd_case +{ + subcmd=$1 + shift + + case ${subcmd} in + application|applicationattempt|container) + HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.ApplicationCLI + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + set -- "${subcmd}" "$@" + ;; + classpath) + hadoop_do_classpath_subcommand HADOOP_CLASSNAME "$@" + ;; + cluster) + HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.ClusterCLI + hadoop_debug "Append YARN_CLIENT_OPTS onto YARN_OPTS" + YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}" + ;; + daemonlog) + HADOOP_CLASSNAME=org.apache.hadoop.log.LogLevel + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + envvars) + echo "JAVA_HOME='${JAVA_HOME}'" + echo "HADOOP_YARN_HOME='${HADOOP_YARN_HOME}'" + echo "YARN_DIR='${YARN_DIR}'" + echo "YARN_LIB_JARS_DIR='${YARN_LIB_JARS_DIR}'" + echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" + echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" + echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" + echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" + exit 0 + ;; + jar) + HADOOP_CLASSNAME=org.apache.hadoop.util.RunJar + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + historyserver) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2 + echo "Instead use the timelineserver command for it." 1>&2 + echo "Starting the History Server anyway..." 1>&2 + HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' + ;; + logs) + HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.LogsCLI + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + node) + HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.NodeCLI + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + nodemanager) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.nodemanager.NodeManager' + hadoop_debug "Append YARN_NODEMANAGER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_NODEMANAGER_OPTS}" + # Backwards compatibility + if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then + HADOOP_HEAPSIZE_MAX="${YARN_NODEMANAGER_HEAPSIZE}" + fi + ;; + proxyserver) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' + hadoop_debug "Append YARN_PROXYSERVER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_PROXYSERVER_OPTS}" + # Backwards compatibility + if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then + # shellcheck disable=SC2034 + HADOOP_HEAPSIZE_MAX="${YARN_PROXYSERVER_HEAPSIZE}" + fi + ;; + queue) + HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.QueueCLI + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + resourcemanager) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_RESOURCEMANAGER_OPTS}" + hadoop_debug "Append YARN_RESOURCEMANAGER_OPTS onto HADOOP_OPTS" + # Backwards compatibility + if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then + # shellcheck disable=SC2034 + HADOOP_HEAPSIZE_MAX="${YARN_RESOURCEMANAGER_HEAPSIZE}" + fi + ;; + rmadmin) + HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.cli.RMAdminCLI' + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + scmadmin) + HADOOP_CLASSNAME='org.apache.hadoop.yarn.client.SCMAdmin' + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + sharedcachemanager) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.sharedcachemanager.SharedCacheManager' + hadoop_debug "Append YARN_SHAREDCACHEMANAGER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_SHAREDCACHEMANAGER_OPTS}" + ;; + timelineserver) + HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true" + HADOOP_CLASSNAME='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' + hadoop_debug "Append YARN_TIMELINESERVER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_TIMELINESERVER_OPTS}" + # Backwards compatibility + if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then + # shellcheck disable=SC2034 + HADOOP_HEAPSIZE_MAX="${YARN_TIMELINESERVER_HEAPSIZE}" + fi + ;; + version) + HADOOP_CLASSNAME=org.apache.hadoop.util.VersionInfo + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + top) + doNotSetCols=0 + doNotSetRows=0 + for i in "$@"; do + if [[ $i == "-cols" ]]; then + doNotSetCols=1 + fi + if [[ $i == "-rows" ]]; then + doNotSetRows=1 + fi + done + if [ $doNotSetCols == 0 ] && [ -n "${TERM}" ]; then + cols=$(tput cols) + if [ -n "$cols" ]; then + args=( $@ ) + args=("${args[@]}" "-cols" "$cols") + set -- "${args[@]}" + fi + fi + if [ $doNotSetRows == 0 ] && [ -n "${TERM}" ]; then + rows=$(tput lines) + if [ -n "$rows" ]; then + args=( $@ ) + args=("${args[@]}" "-rows" "$rows") + set -- "${args[@]}" + fi + fi + HADOOP_CLASSNAME=org.apache.hadoop.yarn.client.cli.TopCLI + hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" + ;; + *) + HADOOP_CLASSNAME="${subcmd}" + if ! hadoop_validate_classname "${HADOOP_CLASSNAME}"; then + hadoop_exit_with_usage 1 + fi + ;; + esac +} # let's locate libexec... if [[ -n "${HADOOP_HOME}" ]]; then @@ -74,195 +250,71 @@ if [[ $# = 0 ]]; then fi # get arguments -COMMAND=$1 +HADOOP_SUBCMD=$1 shift -case "${COMMAND}" in - application|applicationattempt|container) - CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - set -- "${COMMAND}" "$@" - ;; - classpath) - hadoop_do_classpath_subcommand CLASS "$@" - ;; - cluster) - CLASS=org.apache.hadoop.yarn.client.cli.ClusterCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto YARN_OPTS" - YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}" - ;; - daemonlog) - CLASS=org.apache.hadoop.log.LogLevel - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - envvars) - echo "JAVA_HOME='${JAVA_HOME}'" - echo "HADOOP_YARN_HOME='${HADOOP_YARN_HOME}'" - echo "YARN_DIR='${YARN_DIR}'" - echo "YARN_LIB_JARS_DIR='${YARN_LIB_JARS_DIR}'" - echo "HADOOP_CONF_DIR='${HADOOP_CONF_DIR}'" - echo "HADOOP_TOOLS_HOME='${HADOOP_TOOLS_HOME}'" - echo "HADOOP_TOOLS_DIR='${HADOOP_TOOLS_DIR}'" - echo "HADOOP_TOOLS_LIB_JARS_DIR='${HADOOP_TOOLS_LIB_JARS_DIR}'" - exit 0 - ;; - jar) - CLASS=org.apache.hadoop.util.RunJar - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - historyserver) - supportdaemonization="true" - echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2 - echo "Instead use the timelineserver command for it." 1>&2 - echo "Starting the History Server anyway..." 1>&2 - CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' - ;; - logs) - CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - node) - CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - nodemanager) - supportdaemonization="true" - CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager' - hadoop_debug "Append YARN_NODEMANAGER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_NODEMANAGER_OPTS}" - # Backwards compatibility - if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then - HADOOP_HEAPSIZE_MAX="${YARN_NODEMANAGER_HEAPSIZE}" - fi - ;; - proxyserver) - supportdaemonization="true" - CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' - hadoop_debug "Append YARN_PROXYSERVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_PROXYSERVER_OPTS}" - # Backwards compatibility - if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then - HADOOP_HEAPSIZE_MAX="${YARN_PROXYSERVER_HEAPSIZE}" - fi - ;; - queue) - CLASS=org.apache.hadoop.yarn.client.cli.QueueCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - resourcemanager) - supportdaemonization="true" - CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_RESOURCEMANAGER_OPTS}" - hadoop_debug "Append YARN_RESOURCEMANAGER_OPTS onto HADOOP_OPTS" - # Backwards compatibility - if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then - HADOOP_HEAPSIZE_MAX="${YARN_RESOURCEMANAGER_HEAPSIZE}" - fi - ;; - rmadmin) - CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI' - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - scmadmin) - CLASS='org.apache.hadoop.yarn.client.SCMAdmin' - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - sharedcachemanager) - supportdaemonization="true" - CLASS='org.apache.hadoop.yarn.server.sharedcachemanager.SharedCacheManager' - hadoop_debug "Append YARN_SHAREDCACHEMANAGER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_SHAREDCACHEMANAGER_OPTS}" - ;; - timelineserver) - supportdaemonization="true" - CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer' - hadoop_debug "Append YARN_TIMELINESERVER_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_TIMELINESERVER_OPTS}" - # Backwards compatibility - if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then - HADOOP_HEAPSIZE_MAX="${YARN_TIMELINESERVER_HEAPSIZE}" - fi - ;; - version) - CLASS=org.apache.hadoop.util.VersionInfo - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - top) - doNotSetCols=0 - doNotSetRows=0 - for i in "$@"; do - if [[ $i == "-cols" ]]; then - doNotSetCols=1 - fi - if [[ $i == "-rows" ]]; then - doNotSetRows=1 - fi - done - if [ $doNotSetCols == 0 ] && [ -n "${TERM}" ]; then - cols=$(tput cols) - if [ -n "$cols" ]; then - args=( $@ ) - args=("${args[@]}" "-cols" "$cols") - set -- "${args[@]}" - fi - fi - if [ $doNotSetRows == 0 ] && [ -n "${TERM}" ]; then - rows=$(tput lines) - if [ -n "$rows" ]; then - args=( $@ ) - args=("${args[@]}" "-rows" "$rows") - set -- "${args[@]}" - fi - fi - CLASS=org.apache.hadoop.yarn.client.cli.TopCLI - hadoop_debug "Append YARN_CLIENT_OPTS onto HADOOP_OPTS" - HADOOP_OPTS="${HADOOP_OPTS} ${YARN_CLIENT_OPTS}" - ;; - *) - CLASS="${COMMAND}" - if ! hadoop_validate_classname "${CLASS}"; then - hadoop_exit_with_usage 1 - fi - ;; -esac +HADOOP_SUBCMD_ARGS=("$@") -hadoop_verify_user "${COMMAND}" +if declare -f yarn_subcommand_"${HADOOP_SUBCMD}" >/dev/null 2>&1; then + hadoop_debug "Calling dynamically: yarn_subcommand_${HADOOP_SUBCMD} ${HADOOP_SUBCMD_ARGS[*]}" + "yarn_subcommand_${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +else + yarncmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}" +fi + +hadoop_verify_user "${HADOOP_SUBCMD}" if [[ ${HADOOP_SLAVE_MODE} = true ]]; then hadoop_common_slave_mode_execute "${HADOOP_YARN_HOME}/bin/yarn" "${HADOOP_USER_PARAMS[@]}" exit $? fi -daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out" -daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid" +if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + HADOOP_SECURE_USER="${HADOOP_SUBCMD_SECUREUSER}" + hadoop_verify_secure_prereq + hadoop_setup_secure_service + priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.err" + priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" +else + daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.out" + daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}.pid" +fi if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then # shellcheck disable=SC2034 HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}" - HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log" + # shellcheck disable=SC2034 + HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${HADOOP_SUBCMD}-${HOSTNAME}.log" fi hadoop_finalize -if [[ -n "${supportdaemonization}" ]]; then - if [[ -n "${secure_service}" ]]; then - hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" \ - "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \ - "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@" +if [[ "${HADOOP_SUBCMD_SUPPORTDAEMONIZATION}" = true ]]; then + if [[ "${HADOOP_SUBCMD_SECURESERVICE}" = true ]]; then + hadoop_secure_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${priv_pidfile}" \ + "${priv_outfile}" \ + "${priv_errfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" else - hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \ - "${daemon_pidfile}" "${daemon_outfile}" "$@" + hadoop_daemon_handler \ + "${HADOOP_DAEMON_MODE}" \ + "${HADOOP_SUBCMD}" \ + "${HADOOP_CLASSNAME}" \ + "${daemon_pidfile}" \ + "${daemon_outfile}" \ + "${HADOOP_SUBCMD_ARGS[@]}" fi exit $? else - hadoop_java_exec "${COMMAND}" "${CLASS}" "$@" + # shellcheck disable=SC2086 + hadoop_java_exec "${HADOOP_SUBCMD}" "${HADOOP_CLASSNAME}" "${HADOOP_SUBCMD_ARGS[@]}" fi diff --git a/hadoop-yarn-project/hadoop-yarn/shellprofile.d/yarn.sh b/hadoop-yarn-project/hadoop-yarn/shellprofile.d/hadoop-yarn.sh similarity index 99% rename from hadoop-yarn-project/hadoop-yarn/shellprofile.d/yarn.sh rename to hadoop-yarn-project/hadoop-yarn/shellprofile.d/hadoop-yarn.sh index 4aa20b1fcb..4602293bf1 100644 --- a/hadoop-yarn-project/hadoop-yarn/shellprofile.d/yarn.sh +++ b/hadoop-yarn-project/hadoop-yarn/shellprofile.d/hadoop-yarn.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership.