HADOOP-11553. Foramlize the shell API (aw)

This commit is contained in:
Allen Wittenauer 2015-03-26 15:09:51 -07:00
parent 61df1b27a7
commit b30ca8ce0e
6 changed files with 765 additions and 57 deletions

250
dev-support/shelldocs.py Executable file
View File

@ -0,0 +1,250 @@
#!/usr/bin/python
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import sys
import string
from optparse import OptionParser
def docstrip(key,string):
string=re.sub("^## @%s " % key ,"",string)
string=string.lstrip()
string=string.rstrip()
return string
def toc(list):
tocout=[]
header=()
for i in list:
if header != i.getinter():
header=i.getinter()
line=" * %s\n" % (i.headerbuild())
tocout.append(line)
line=" * [%s](#%s)\n" % (i.getname().replace("_","\_"),i.getname())
tocout.append(line)
return tocout
class ShellFunction:
def __init__(self):
self.reset()
def __cmp__(self,other):
if (self.audience == other.audience):
if (self.stability == other.stability):
if (self.replaceb == other.replaceb):
return(cmp(self.name,other.name))
else:
if (self.replaceb == "Yes"):
return -1
else:
return 1
else:
if (self.stability == "Stable"):
return -1
else:
return 1
else:
if (self.audience == "Public"):
return -1
else:
return 1
def reset(self):
self.name=None
self.audience=None
self.stability=None
self.replaceb=None
self.returnt=None
self.desc=None
self.params=None
def setname(self,text):
definition=text.split();
self.name=definition[1]
def getname(self):
if (self.name is None):
return "None"
else:
return self.name
def setaudience(self,text):
self.audience=docstrip("audience",text)
self.audience=self.audience.capitalize()
def getaudience(self):
if (self.audience is None):
return "None"
else:
return self.audience
def setstability(self,text):
self.stability=docstrip("stability",text)
self.stability=self.stability.capitalize()
def getstability(self):
if (self.stability is None):
return "None"
else:
return self.stability
def setreplace(self,text):
self.replaceb=docstrip("replaceable",text)
self.replaceb=self.replaceb.capitalize()
def getreplace(self):
if (self.replaceb is None):
return "None"
else:
return self.replaceb
def getinter(self):
return( (self.getaudience(), self.getstability(), self.getreplace()))
def addreturn(self,text):
if (self.returnt is None):
self.returnt = []
self.returnt.append(docstrip("return",text))
def getreturn(self):
if (self.returnt is None):
return "Nothing"
else:
return "\n\n".join(self.returnt)
def adddesc(self,text):
if (self.desc is None):
self.desc = []
self.desc.append(docstrip("description",text))
def getdesc(self):
if (self.desc is None):
return "None"
else:
return " ".join(self.desc)
def addparam(self,text):
if (self.params is None):
self.params = []
self.params.append(docstrip("param",text))
def getparams(self):
if (self.params is None):
return ""
else:
return " ".join(self.params)
def getusage(self):
line="%s %s" % (self.name, self.getparams())
return line
def headerbuild(self):
if self.getreplace() == "Yes":
replacetext="Replaceable"
else:
replacetext="Not Replaceable"
line="%s/%s/%s" % (self.getaudience(), self.getstability(), replacetext)
return(line)
def getdocpage(self):
line="### `%s`\n\n"\
"* Synopsis\n\n"\
"```\n%s\n"\
"```\n\n" \
"* Description\n\n" \
"%s\n\n" \
"* Returns\n\n" \
"%s\n\n" \
"| Classification | Level |\n" \
"| :--- | :--- |\n" \
"| Audience | %s |\n" \
"| Stability | %s |\n" \
"| Replaceable | %s |\n\n" \
% (self.getname(),
self.getusage(),
self.getdesc(),
self.getreturn(),
self.getaudience(),
self.getstability(),
self.getreplace())
return line
def __str__(self):
line="{%s %s %s %s}" \
% (self.getname(),
self.getaudience(),
self.getstability(),
self.getreplace())
return line
def main():
parser=OptionParser(usage="usage: %prog --skipprnorep --output OUTFILE --input INFILE [--input INFILE ...]")
parser.add_option("-o","--output", dest="outfile",
action="store", type="string",
help="file to create", metavar="OUTFILE")
parser.add_option("-i","--input", dest="infile",
action="append", type="string",
help="file to read", metavar="INFILE")
parser.add_option("--skipprnorep", dest="skipprnorep",
action="store_true", help="Skip Private & Not Replaceable")
(options, args)=parser.parse_args()
allfuncs=[]
for filename in options.infile:
with open(filename,"r") as shellcode:
funcdef=ShellFunction()
for line in shellcode:
if line.startswith('## @description'):
funcdef.adddesc(line)
elif line.startswith('## @audience'):
funcdef.setaudience(line)
elif line.startswith('## @stability'):
funcdef.setstability(line)
elif line.startswith('## @replaceable'):
funcdef.setreplace(line)
elif line.startswith('## @param'):
funcdef.addparam(line)
elif line.startswith('## @return'):
funcdef.addreturn(line)
elif line.startswith('function'):
funcdef.setname(line)
if options.skipprnorep:
if funcdef.getaudience() == "Private" and \
funcdef.getreplace() == "No":
pass
else:
allfuncs.append(funcdef)
funcdef=ShellFunction()
allfuncs=sorted(allfuncs)
outfile=open(options.outfile, "w")
for line in toc(allfuncs):
outfile.write(line)
outfile.write("\n------\n\n")
header=[]
for funcs in allfuncs:
if header != funcs.getinter():
header=funcs.getinter()
line="## %s\n" % (funcs.headerbuild())
outfile.write(line)
outfile.write(funcs.getdocpage())
outfile.close()
if __name__ == "__main__":
main()

View File

@ -19,6 +19,8 @@ Trunk (Unreleased)
HADOOP-11657. Align the output of `hadoop fs -du` to be more Unix-like. HADOOP-11657. Align the output of `hadoop fs -du` to be more Unix-like.
(aajisaka) (aajisaka)
HADOOP-11553. Foramlize the shell API (aw)
NEW FEATURES NEW FEATURES
HADOOP-6590. Add a username check for hadoop sub-commands (John Smith via aw) HADOOP-6590. Add a username check for hadoop sub-commands (John Smith via aw)

View File

@ -471,10 +471,10 @@
<goal>run</goal> <goal>run</goal>
</goals> </goals>
<configuration> <configuration>
<tasks> <target>
<copy file="src/main/resources/core-default.xml" todir="src/site/resources"/> <copy file="src/main/resources/core-default.xml" todir="src/site/resources"/>
<copy file="src/main/xsl/configuration.xsl" todir="src/site/resources"/> <copy file="src/main/xsl/configuration.xsl" todir="src/site/resources"/>
</tasks> </target>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>
@ -509,6 +509,53 @@
</excludes> </excludes>
</configuration> </configuration>
</plugin> </plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>shelldocs</id>
<phase>pre-site</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>python</executable>
<workingDirectory>src/site/markdown</workingDirectory>
<arguments>
<argument>${basedir}/../../dev-support/shelldocs.py</argument>
<argument>--skipprnorep</argument>
<argument>--output</argument>
<argument>${basedir}/src/site/markdown/UnixShellAPI.md</argument>
<argument>--input</argument>
<argument>${basedir}/src/main/bin/hadoop-functions.sh</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>src/site/markdown</directory>
<includes>
<include>UnixShellAPI.md</include>
</includes>
<followSymlinks>false</followSymlinks>
</fileset>
<fileset>
<directory>src/site/resources</directory>
<includes>
<include>configuration.xsl</include>
<include>core-default.xml</include>
</includes>
<followSymlinks>false</followSymlinks>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins> </plugins>
</build> </build>

View File

@ -14,13 +14,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
## @description Print a message to stderr
## @audience public
## @stability stable
## @replaceable no
## @param string
function hadoop_error function hadoop_error
{ {
# NOTE: This function is not user replaceable.
echo "$*" 1>&2 echo "$*" 1>&2
} }
## @description Print a message to stderr if --debug is turned on
## @audience public
## @stability stable
## @replaceable no
## @param string
function hadoop_debug function hadoop_debug
{ {
if [[ -n "${HADOOP_SHELL_SCRIPT_DEBUG}" ]]; then if [[ -n "${HADOOP_SHELL_SCRIPT_DEBUG}" ]]; then
@ -28,10 +36,14 @@ function hadoop_debug
fi fi
} }
## @description Replace `oldvar` with `newvar` if `oldvar` exists.
## @audience public
## @stability stable
## @replaceable yes
## @param oldvar
## @param newvar
function hadoop_deprecate_envvar function hadoop_deprecate_envvar
{ {
#
# Deprecate $1 with $2
local oldvar=$1 local oldvar=$1
local newvar=$2 local newvar=$2
local oldval=${!oldvar} local oldval=${!oldvar}
@ -50,10 +62,12 @@ function hadoop_deprecate_envvar
fi fi
} }
## @description Bootstraps the Hadoop shell environment
## @audience private
## @stability evolving
## @replaceable no
function hadoop_bootstrap function hadoop_bootstrap
{ {
# NOTE: This function is not user replaceable.
# the root of the Hadoop installation # the root of the Hadoop installation
# See HADOOP-6255 for the expected directory structure layout # See HADOOP-6255 for the expected directory structure layout
@ -94,14 +108,14 @@ function hadoop_bootstrap
hadoop_debug "Initial HADOOP_OPTS=${HADOOP_OPTS}" hadoop_debug "Initial HADOOP_OPTS=${HADOOP_OPTS}"
} }
## @description Locate Hadoop's configuration directory
## @audience private
## @stability evolving
## @replaceable no
function hadoop_find_confdir function hadoop_find_confdir
{ {
# NOTE: This function is not user replaceable.
local conf_dir local conf_dir
# Look for the basic hadoop configuration area.
#
#
# An attempt at compatibility with some Hadoop 1.x # An attempt at compatibility with some Hadoop 1.x
# installs. # installs.
if [[ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]]; then if [[ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]]; then
@ -114,6 +128,11 @@ function hadoop_find_confdir
hadoop_debug "HADOOP_CONF_DIR=${HADOOP_CONF_DIR}" hadoop_debug "HADOOP_CONF_DIR=${HADOOP_CONF_DIR}"
} }
## @description Validate ${HADOOP_CONF_DIR}
## @audience public
## @stability stable
## @replaceable yes
## @return will exit on failure conditions
function hadoop_verify_confdir function hadoop_verify_confdir
{ {
# Check only log4j.properties by default. # Check only log4j.properties by default.
@ -123,10 +142,12 @@ function hadoop_verify_confdir
fi fi
} }
## @description Import the hadoop-env.sh settings
## @audience private
## @stability evolving
## @replaceable no
function hadoop_exec_hadoopenv function hadoop_exec_hadoopenv
{ {
# NOTE: This function is not user replaceable.
if [[ -z "${HADOOP_ENV_PROCESSED}" ]]; then if [[ -z "${HADOOP_ENV_PROCESSED}" ]]; then
if [[ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]]; then if [[ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]]; then
export HADOOP_ENV_PROCESSED=true export HADOOP_ENV_PROCESSED=true
@ -135,26 +156,35 @@ function hadoop_exec_hadoopenv
fi fi
} }
## @description Import the replaced functions
## @audience private
## @stability evolving
## @replaceable no
function hadoop_exec_userfuncs function hadoop_exec_userfuncs
{ {
# NOTE: This function is not user replaceable.
if [[ -e "${HADOOP_CONF_DIR}/hadoop-user-functions.sh" ]]; then if [[ -e "${HADOOP_CONF_DIR}/hadoop-user-functions.sh" ]]; then
. "${HADOOP_CONF_DIR}/hadoop-user-functions.sh" . "${HADOOP_CONF_DIR}/hadoop-user-functions.sh"
fi fi
} }
## @description Read the user's settings. This provides for users to
## @description override and/or append hadoop-env.sh. It is not meant
## @description as a complete system override.
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_exec_hadooprc function hadoop_exec_hadooprc
{ {
# Read the user's settings. This provides for users to override
# and/or append hadoop-env.sh. It is not meant as a complete system override.
if [[ -f "${HOME}/.hadooprc" ]]; then if [[ -f "${HOME}/.hadooprc" ]]; then
hadoop_debug "Applying the user's .hadooprc" hadoop_debug "Applying the user's .hadooprc"
. "${HOME}/.hadooprc" . "${HOME}/.hadooprc"
fi fi
} }
## @description Import shellprofile.d content
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_import_shellprofiles function hadoop_import_shellprofiles
{ {
local i local i
@ -180,6 +210,10 @@ function hadoop_import_shellprofiles
done done
} }
## @description Initialize the registered shell profiles
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_shellprofiles_init function hadoop_shellprofiles_init
{ {
local i local i
@ -194,6 +228,10 @@ function hadoop_shellprofiles_init
done done
} }
## @description Apply the shell profile classpath additions
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_shellprofiles_classpath function hadoop_shellprofiles_classpath
{ {
local i local i
@ -208,6 +246,10 @@ function hadoop_shellprofiles_classpath
done done
} }
## @description Apply the shell profile native library additions
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_shellprofiles_nativelib function hadoop_shellprofiles_nativelib
{ {
local i local i
@ -222,6 +264,10 @@ function hadoop_shellprofiles_nativelib
done done
} }
## @description Apply the shell profile final configuration
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_shellprofiles_finalize function hadoop_shellprofiles_finalize
{ {
local i local i
@ -236,6 +282,11 @@ function hadoop_shellprofiles_finalize
done done
} }
## @description Initialize the Hadoop shell environment, now that
## @description user settings have been imported
## @audience private
## @stability evolving
## @replaceable no
function hadoop_basic_init function hadoop_basic_init
{ {
# Some of these are also set in hadoop-env.sh. # Some of these are also set in hadoop-env.sh.
@ -290,10 +341,15 @@ function hadoop_basic_init
HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10} HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10}
} }
function hadoop_populate_slaves_file() ## @description Set the slave support information to the contents
## @description of `filename`
## @audience public
## @stability stable
## @replaceable no
## @param filename
## @return will exit if file does not exist
function hadoop_populate_slaves_file
{ {
# NOTE: This function is not user replaceable.
local slavesfile=$1 local slavesfile=$1
shift shift
if [[ -f "${slavesfile}" ]]; then if [[ -f "${slavesfile}" ]]; then
@ -308,10 +364,17 @@ function hadoop_populate_slaves_file()
fi fi
} }
## @description Rotates the given `file` until `number` of
## @description files exist.
## @audience public
## @stability stable
## @replaceable no
## @param filename
## @param [number]
## @return $? will contain last mv's return value
function hadoop_rotate_log function hadoop_rotate_log
{ {
# #
# log rotation (mainly used for .out files)
# Users are likely to replace this one for something # Users are likely to replace this one for something
# that gzips or uses dates or who knows what. # that gzips or uses dates or who knows what.
# #
@ -334,6 +397,13 @@ function hadoop_rotate_log
fi fi
} }
## @description Via ssh, log into `hostname` and run `command`
## @audience private
## @stability evolving
## @replaceable yes
## @param hostname
## @param command
## @param [...]
function hadoop_actual_ssh function hadoop_actual_ssh
{ {
# we are passing this function to xargs # we are passing this function to xargs
@ -345,6 +415,13 @@ function hadoop_actual_ssh
ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /" ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /"
} }
## @description Connect to ${HADOOP_SLAVES} or ${HADOOP_SLAVE_NAMES}
## @description and execute command.
## @audience private
## @stability evolving
## @replaceable yes
## @param command
## @param [...]
function hadoop_connect_to_hosts function hadoop_connect_to_hosts
{ {
# shellcheck disable=SC2124 # shellcheck disable=SC2124
@ -405,6 +482,11 @@ function hadoop_connect_to_hosts
fi fi
} }
## @description Utility routine to handle --slaves mode
## @audience private
## @stability evolving
## @replaceable yes
## @param commandarray
function hadoop_common_slave_mode_execute function hadoop_common_slave_mode_execute
{ {
# #
@ -431,6 +513,14 @@ function hadoop_common_slave_mode_execute
hadoop_connect_to_hosts -- "${argv[@]}" hadoop_connect_to_hosts -- "${argv[@]}"
} }
## @description Verify that a shell command was passed a valid
## @description class name
## @audience public
## @stability stable
## @replaceable yes
## @param classname
## @return 0 = success
## @return 1 = failure w/user message
function hadoop_validate_classname function hadoop_validate_classname
{ {
local class=$1 local class=$1
@ -445,6 +535,14 @@ function hadoop_validate_classname
return 0 return 0
} }
## @description Append the `appendstring` if `checkstring` is not
## @description present in the given `envvar`
## @audience public
## @stability stable
## @replaceable yes
## @param envvar
## @param checkstring
## @param appendstring
function hadoop_add_param function hadoop_add_param
{ {
# #
@ -466,21 +564,30 @@ function hadoop_add_param
fi fi
} }
## @description Register the given `shellprofile` to the Hadoop
## @description shell subsystem
## @audience public
## @stability stable
## @replaceable yes
## @param shellprofile
function hadoop_add_profile function hadoop_add_profile
{ {
# shellcheck disable=SC2086 # shellcheck disable=SC2086
hadoop_add_param HADOOP_SHELL_PROFILES $1 $1 hadoop_add_param HADOOP_SHELL_PROFILES $1 $1
} }
## @description Add a file system object (directory, file,
## @description wildcard, ...) to the classpath. Optionally provide
## @description a hint as to where in the classpath it should go.
## @audience public
## @stability stable
## @replaceable yes
## @param object
## @param [before|after]
## @return 0 = success (added or duplicate)
## @return 1 = failure (doesn't exist or some other reason)
function hadoop_add_classpath function hadoop_add_classpath
{ {
# two params:
# $1 = directory, file, wildcard, whatever to add
# $2 = before or after, which determines where in the
# classpath this object should go. default is after
# return 0 = success (added or duplicate)
# return 1 = failure (doesn't exist, whatever)
# However, with classpath (& JLP), we can do dedupe # However, with classpath (& JLP), we can do dedupe
# along with some sanity checking (e.g., missing directories) # along with some sanity checking (e.g., missing directories)
# since we have a better idea of what is legal # since we have a better idea of what is legal
@ -517,15 +624,23 @@ function hadoop_add_classpath
return 0 return 0
} }
## @description Add a file system object (directory, file,
## @description wildcard, ...) to the colonpath. Optionally provide
## @description a hint as to where in the colonpath it should go.
## @description Prior to adding, objects are checked for duplication
## @description and check for existence. Many other functions use
## @description this function as their base implementation
## @description including `hadoop_add_javalibpath` and `hadoop_add_ldlibpath`.
## @audience public
## @stability stable
## @replaceable yes
## @param envvar
## @param object
## @param [before|after]
## @return 0 = success (added or duplicate)
## @return 1 = failure (doesn't exist or some other reason)
function hadoop_add_colonpath function hadoop_add_colonpath
{ {
# two params:
# $1 = directory, file, wildcard, whatever to add
# $2 = before or after, which determines where in the
# classpath this object should go
# return 0 = success
# return 1 = failure (duplicate)
# this is CLASSPATH, JLP, etc but with dedupe but no # this is CLASSPATH, JLP, etc but with dedupe but no
# other checking # other checking
if [[ -d "${2}" ]] && [[ ":${!1}:" != *":$2:"* ]]; then if [[ -d "${2}" ]] && [[ ":${!1}:" != *":$2:"* ]]; then
@ -548,12 +663,34 @@ function hadoop_add_colonpath
return 1 return 1
} }
## @description Add a file system object (directory, file,
## @description wildcard, ...) to the Java JNI path. Optionally
## @description provide a hint as to where in the Java JNI path
## @description it should go.
## @audience public
## @stability stable
## @replaceable yes
## @param object
## @param [before|after]
## @return 0 = success (added or duplicate)
## @return 1 = failure (doesn't exist or some other reason)
function hadoop_add_javalibpath function hadoop_add_javalibpath
{ {
# specialized function for a common use case # specialized function for a common use case
hadoop_add_colonpath JAVA_LIBRARY_PATH "$1" "$2" hadoop_add_colonpath JAVA_LIBRARY_PATH "$1" "$2"
} }
## @description Add a file system object (directory, file,
## @description wildcard, ...) to the LD_LIBRARY_PATH. Optionally
## @description provide a hint as to where in the LD_LIBRARY_PATH
## @description it should go.
## @audience public
## @stability stable
## @replaceable yes
## @param object
## @param [before|after]
## @return 0 = success (added or duplicate)
## @return 1 = failure (doesn't exist or some other reason)
function hadoop_add_ldlibpath function hadoop_add_ldlibpath
{ {
# specialized function for a common use case # specialized function for a common use case
@ -563,6 +700,11 @@ function hadoop_add_ldlibpath
export LD_LIBRARY_PATH export LD_LIBRARY_PATH
} }
## @description Add the common/core Hadoop components to the
## @description environment
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_add_common_to_classpath function hadoop_add_common_to_classpath
{ {
# #
@ -582,6 +724,11 @@ function hadoop_add_common_to_classpath
hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"'/*' hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"'/*'
} }
## @description Add the user's custom classpath settings to the
## @description environment
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_add_to_classpath_userpath function hadoop_add_to_classpath_userpath
{ {
# Add the user-specified HADOOP_CLASSPATH to the # Add the user-specified HADOOP_CLASSPATH to the
@ -619,13 +766,15 @@ function hadoop_add_to_classpath_userpath
fi fi
} }
## @description Routine to configure any OS-specific settings.
## @audience public
## @stability stable
## @replaceable yes
## @return may exit on failure conditions
function hadoop_os_tricks function hadoop_os_tricks
{ {
local bindv6only local bindv6only
# Some OSes have special needs. Here's some out of the box examples for OS X,
# Linux and Windows on Cygwin.
# Vendors, replace this with your special sauce.
HADOOP_IS_CYGWIN=false HADOOP_IS_CYGWIN=false
case ${HADOOP_OS_TYPE} in case ${HADOOP_OS_TYPE} in
Darwin) Darwin)
@ -664,6 +813,11 @@ function hadoop_os_tricks
esac esac
} }
## @description Configure/verify ${JAVA_HOME}
## @audience public
## @stability stable
## @replaceable yes
## @return may exit on failure conditions
function hadoop_java_setup function hadoop_java_setup
{ {
# Bail if we did not detect it # Bail if we did not detect it
@ -685,6 +839,10 @@ function hadoop_java_setup
fi fi
} }
## @description Finish Java JNI paths prior to execution
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_finalize_libpaths function hadoop_finalize_libpaths
{ {
if [[ -n "${JAVA_LIBRARY_PATH}" ]]; then if [[ -n "${JAVA_LIBRARY_PATH}" ]]; then
@ -695,6 +853,10 @@ function hadoop_finalize_libpaths
fi fi
} }
## @description Finish Java heap parameters prior to execution
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_finalize_hadoop_heap function hadoop_finalize_hadoop_heap
{ {
if [[ -n "${HADOOP_HEAPSIZE_MAX}" ]]; then if [[ -n "${HADOOP_HEAPSIZE_MAX}" ]]; then
@ -720,9 +882,15 @@ function hadoop_finalize_hadoop_heap
fi fi
} }
# Accepts a variable name. If running on Cygwin, sets the variable value to the ## @description Converts the contents of the variable name
# equivalent translated Windows path by running the cygpath utility. If the ## @description `varnameref` into the equivalent Windows path.
# second argument is true, then the variable is treated as a path list. ## @description If the second parameter is true, then `varnameref`
## @description is treated as though it was a path list.
## @audience public
## @stability stable
## @replaceable yes
## @param varnameref
## @param [true]
function hadoop_translate_cygwin_path function hadoop_translate_cygwin_path
{ {
if [[ "${HADOOP_IS_CYGWIN}" = "true" ]]; then if [[ "${HADOOP_IS_CYGWIN}" = "true" ]]; then
@ -736,9 +904,11 @@ function hadoop_translate_cygwin_path
fi fi
} }
# ## @description Finish configuring Hadoop specific system properties
# fill in any last minute options that might not have been defined yet ## @description prior to executing Java
# ## @audience private
## @stability evolving
## @replaceable yes
function hadoop_finalize_hadoop_opts function hadoop_finalize_hadoop_opts
{ {
hadoop_translate_cygwin_path HADOOP_LOG_DIR hadoop_translate_cygwin_path HADOOP_LOG_DIR
@ -754,6 +924,10 @@ function hadoop_finalize_hadoop_opts
hadoop_add_param HADOOP_OPTS hadoop.security.logger "-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER}" hadoop_add_param HADOOP_OPTS hadoop.security.logger "-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER}"
} }
## @description Finish Java classpath prior to execution
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_finalize_classpath function hadoop_finalize_classpath
{ {
hadoop_add_classpath "${HADOOP_CONF_DIR}" before hadoop_add_classpath "${HADOOP_CONF_DIR}" before
@ -764,6 +938,10 @@ function hadoop_finalize_classpath
hadoop_translate_cygwin_path CLASSPATH true hadoop_translate_cygwin_path CLASSPATH true
} }
## @description Finish Catalina configuration prior to execution
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_finalize_catalina_opts function hadoop_finalize_catalina_opts
{ {
@ -783,9 +961,14 @@ function hadoop_finalize_catalina_opts
hadoop_add_param CATALINA_OPTS "${prefix}.ssl.keystore.file" "-D${prefix}.ssl.keystore.file=${HADOOP_CATALINA_SSL_KEYSTORE_FILE}" hadoop_add_param CATALINA_OPTS "${prefix}.ssl.keystore.file" "-D${prefix}.ssl.keystore.file=${HADOOP_CATALINA_SSL_KEYSTORE_FILE}"
} }
## @description Finish all the remaining environment settings prior
## @description to executing Java. This is a wrapper that calls
## @description the other `finalize` routines.
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_finalize function hadoop_finalize
{ {
hadoop_shellprofiles_finalize hadoop_shellprofiles_finalize
hadoop_finalize_classpath hadoop_finalize_classpath
@ -801,10 +984,15 @@ function hadoop_finalize
hadoop_translate_cygwin_path HADOOP_MAPRED_HOME hadoop_translate_cygwin_path HADOOP_MAPRED_HOME
} }
## @description Print usage information and exit with the passed
## @description `exitcode`
## @audience public
## @stability stable
## @replaceable no
## @param exitcode
## @return This function will always exit.
function hadoop_exit_with_usage function hadoop_exit_with_usage
{ {
# NOTE: This function is not user replaceable.
local exitcode=$1 local exitcode=$1
if [[ -z $exitcode ]]; then if [[ -z $exitcode ]]; then
exitcode=1 exitcode=1
@ -819,6 +1007,12 @@ function hadoop_exit_with_usage
exit $exitcode exit $exitcode
} }
## @description Verify that prerequisites have been met prior to
## @description excuting a privileged program.
## @audience private
## @stability evolving
## @replaceable yes
## @return This routine may exit.
function hadoop_verify_secure_prereq function hadoop_verify_secure_prereq
{ {
# if you are on an OS like Illumos that has functional roles # if you are on an OS like Illumos that has functional roles
@ -834,6 +1028,9 @@ function hadoop_verify_secure_prereq
fi fi
} }
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_setup_secure_service function hadoop_setup_secure_service
{ {
# need a more complicated setup? replace me! # need a more complicated setup? replace me!
@ -842,6 +1039,9 @@ function hadoop_setup_secure_service
HADOOP_LOG_DIR=${HADOOP_SECURE_LOG_DIR} HADOOP_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
} }
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_verify_piddir function hadoop_verify_piddir
{ {
if [[ -z "${HADOOP_PID_DIR}" ]]; then if [[ -z "${HADOOP_PID_DIR}" ]]; then
@ -864,6 +1064,9 @@ function hadoop_verify_piddir
rm "${HADOOP_PID_DIR}/$$" >/dev/null 2>&1 rm "${HADOOP_PID_DIR}/$$" >/dev/null 2>&1
} }
## @audience private
## @stability evolving
## @replaceable yes
function hadoop_verify_logdir function hadoop_verify_logdir
{ {
if [[ -z "${HADOOP_LOG_DIR}" ]]; then if [[ -z "${HADOOP_LOG_DIR}" ]]; then
@ -886,7 +1089,14 @@ function hadoop_verify_logdir
rm "${HADOOP_LOG_DIR}/$$" >/dev/null 2>&1 rm "${HADOOP_LOG_DIR}/$$" >/dev/null 2>&1
} }
function hadoop_status_daemon() ## @description Determine the status of the daemon referenced
## @description by `pidfile`
## @audience public
## @stability stable
## @replaceable yes
## @param pidfile
## @return (mostly) LSB 4.1.0 compatible status
function hadoop_status_daemon
{ {
# #
# LSB 4.1.0 compatible status command (1) # LSB 4.1.0 compatible status command (1)
@ -919,6 +1129,14 @@ function hadoop_status_daemon()
return 3 return 3
} }
## @description Execute the Java `class`, passing along any `options`.
## @description Additionally, set the Java property -Dproc_`command`.
## @audience public
## @stability stable
## @replaceable yes
## @param command
## @param class
## @param [options]
function hadoop_java_exec function hadoop_java_exec
{ {
# run a java command. this is used for # run a java command. this is used for
@ -936,6 +1154,14 @@ function hadoop_java_exec
exec "${JAVA}" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@" exec "${JAVA}" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@"
} }
## @description Start a non-privileged daemon in the foreground.
## @audience private
## @stability evolving
## @replaceable yes
## @param command
## @param class
## @param pidfile
## @param [options]
function hadoop_start_daemon function hadoop_start_daemon
{ {
# this is our non-privileged daemon starter # this is our non-privileged daemon starter
@ -961,10 +1187,17 @@ function hadoop_start_daemon
exec "${JAVA}" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@" exec "${JAVA}" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@"
} }
## @description Start a non-privileged daemon in the background.
## @audience private
## @stability evolving
## @replaceable yes
## @param command
## @param class
## @param pidfile
## @param outfile
## @param [options]
function hadoop_start_daemon_wrapper function hadoop_start_daemon_wrapper
{ {
# this is our non-privileged daemon start
# that fires up a daemon in the *background*
local daemonname=$1 local daemonname=$1
local class=$2 local class=$2
local pidfile=$3 local pidfile=$3
@ -1019,6 +1252,17 @@ function hadoop_start_daemon_wrapper
return 0 return 0
} }
## @description Start a privileged daemon in the foreground.
## @audience private
## @stability evolving
## @replaceable yes
## @param command
## @param class
## @param daemonpidfile
## @param daemonoutfile
## @param daemonerrfile
## @param wrapperpidfile
## @param [options]
function hadoop_start_secure_daemon function hadoop_start_secure_daemon
{ {
# this is used to launch a secure daemon in the *foreground* # this is used to launch a secure daemon in the *foreground*
@ -1075,6 +1319,18 @@ function hadoop_start_secure_daemon
"${class}" "$@" "${class}" "$@"
} }
## @description Start a privileged daemon in the background.
## @audience private
## @stability evolving
## @replaceable yes
## @param command
## @param class
## @param daemonpidfile
## @param daemonoutfile
## @param wrapperpidfile
## @param warpperoutfile
## @param daemonerrfile
## @param [options]
function hadoop_start_secure_daemon_wrapper function hadoop_start_secure_daemon_wrapper
{ {
# this wraps hadoop_start_secure_daemon to take care # this wraps hadoop_start_secure_daemon to take care
@ -1155,6 +1411,13 @@ function hadoop_start_secure_daemon_wrapper
return 0 return 0
} }
## @description Stop the non-privileged `command` daemon with that
## @description that is running at `pidfile`.
## @audience public
## @stability stable
## @replaceable yes
## @param command
## @param pidfile
function hadoop_stop_daemon function hadoop_stop_daemon
{ {
local cmd=$1 local cmd=$1
@ -1180,6 +1443,15 @@ function hadoop_stop_daemon
fi fi
} }
## @description Stop the privileged `command` daemon with that
## @description that is running at `daemonpidfile` and launched with
## @description the wrapper at `wrapperpidfile`.
## @audience public
## @stability stable
## @replaceable yes
## @param command
## @param daemonpidfile
## @param wrapperpidfile
function hadoop_stop_secure_daemon function hadoop_stop_secure_daemon
{ {
local command=$1 local command=$1
@ -1194,6 +1466,16 @@ function hadoop_stop_secure_daemon
return ${ret} return ${ret}
} }
## @description Manage a non-privileged daemon.
## @audience private
## @stability evolving
## @replaceable yes
## @param [start|stop|status|default]
## @param command
## @param class
## @param daemonpidfile
## @param daemonoutfile
## @param [options]
function hadoop_daemon_handler function hadoop_daemon_handler
{ {
local daemonmode=$1 local daemonmode=$1
@ -1238,6 +1520,19 @@ function hadoop_daemon_handler
esac esac
} }
## @description Manage a privileged daemon.
## @audience private
## @stability evolving
## @replaceable yes
## @param [start|stop|status|default]
## @param command
## @param class
## @param daemonpidfile
## @param daemonoutfile
## @param wrapperpidfile
## @param wrapperoutfile
## @param wrappererrfile
## @param [options]
function hadoop_secure_daemon_handler function hadoop_secure_daemon_handler
{ {
local daemonmode=$1 local daemonmode=$1
@ -1290,6 +1585,13 @@ function hadoop_secure_daemon_handler
esac esac
} }
## @description Verify that ${USER} is allowed to execute the
## @description given subcommand.
## @audience public
## @stability stable
## @replaceable yes
## @param subcommand
## @return will exit on failure conditions
function hadoop_verify_user function hadoop_verify_user
{ {
local command=$1 local command=$1
@ -1303,6 +1605,13 @@ function hadoop_verify_user
fi fi
} }
## @description Perform the 'hadoop classpath', etc subcommand with the given
## @description parameters
## @audience private
## @stability evolving
## @replaceable yes
## @param [parameters]
## @return will print & exit with no params
function hadoop_do_classpath_subcommand function hadoop_do_classpath_subcommand
{ {
if [[ "$#" -gt 1 ]]; then if [[ "$#" -gt 1 ]]; then

View File

@ -0,0 +1,98 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
# Unix Shell Guide
Much of Hadoop's functionality is controlled via [the shell](CommandsManual.html). There are several ways to modify the default behavior of how these commands execute.
## Important End-User Environment Variables
Hadoop has many environment variables that control various aspects of the software. (See `hadoop-env.sh` and related files.) Some of these environment variables are dedicated to helping end users manage their runtime.
### `HADOOP_CLIENT_OPTS`
This environment variable is used for almost all end-user operations. It can be used to set any Java options as well as any Hadoop options via a system property definition. For example:
```bash
HADOOP_CLIENT_OPTS="-Xmx1g -Dhadoop.socks.server=localhost:4000" hadoop fs -ls /tmp
```
will increase the memory and send this command via a SOCKS proxy server.
### `HADOOP_USER_CLASSPATH`
The Hadoop scripts have the capability to inject more content into the classpath of the running command by setting this environment variable. It should be a colon delimited list of directories, files, or wildcard locations.
```bash
HADOOP_USER_CLASSPATH=${HOME}/lib/myjars/*.jar hadoop classpath
```
A user can provides hints to the location of the paths via the `HADOOP_USER_CLASSPATH_FIRST` variable. Setting this to any value will tell the system to try and push these paths near the front.
### Auto-setting of Variables
If a user has a common set of settings, they can be put into the `${HOME}/.hadooprc` file. This file is always read to initialize and override any variables that the user may want to customize. It uses bash syntax, similar to the `.bashrc` file:
For example:
```bash
#
# my custom Hadoop settings!
#
HADOOP_USER_CLASSPATH=${HOME}/hadoopjars/*
HADOOP_USER_CLASSPATH_FIRST=yes
HADOOP_CLIENT_OPTS="-Xmx1g"
```
The `.hadooprc` file can also be used to extend functionality and teach Hadoop new tricks. For example, to run hadoop commands accessing the server referenced in the environment variable `${HADOOP_SERVER}`, the following in the `.hadooprc` will do just that:
```bash
if [[ -n ${HADOOP_SERVER} ]]; then
HADOOP_CONF_DIR=/etc/hadoop.${HADOOP_SERVER}
fi
```
## Administrator Environment
There are many environment variables that impact how the system operates. By far, the most important are the series of `_OPTS` variables that control how daemons work. These variables should contain all of the relevant settings for those daemons.
More, detailed information is contained in `hadoop-env.sh` and the other env.sh files.
Advanced administrators may wish to supplement or do some platform-specific fixes to the existing scripts. In some systems, this means copying the errant script or creating a custom build with these changes. Hadoop provides the capabilities to do function overrides so that the existing code base may be changed in place without all of that work. Replacing functions is covered later under the Shell API documentation.
## Developer and Advanced Administrator Environment
### Shell Profiles
Apache Hadoop allows for third parties to easily add new features through a variety of pluggable interfaces. This includes a shell code subsystem that makes it easy to inject the necessary content into the base installation.
Core to this functionality is the concept of a shell profile. Shell profiles are shell snippets that can do things such as add jars to the classpath, configure Java system properties and more.
Shell profiles may be installed in either `${HADOOP_CONF_DIR}/shellprofile.d` or `${HADOOP_PREFIX}/libexec/shellprofile.d`. Shell profiles in the `libexec` directory are part of the base installation and cannot be overriden by the user. Shell profiles in the configuration directory may be ignored if the end user changes the configuration directory at runtime.
An example of a shell profile is in the libexec directory.
## Shell API
Hadoop's shell code has a [function library](./UnixShellAPI.html) that is open for administrators and developers to use to assist in their configuration and advanced feature management. These APIs follow the standard [Hadoop Interface Classification](./InterfaceClassification.html), with one addition: Replaceable.
The shell code allows for core functions to be overridden. However, not all functions can be or are safe to be replaced. If a function is not safe to replace, it will have an attribute of Replaceable: No. If a function is safe to replace, it will have the attribute of Replaceable: Yes.
In order to replace a function, create a file called `hadoop-user-functions.sh` in the `${HADOOP_CONF_DIR}` directory. Simply define the new, replacement function in this file and the system will pick it up automatically. There may be as many replacement functions as needed in this file. Examples of function replacement are in the `hadoop-user-functions.sh.examples` file.
Functions that are marked Public and Stable are safe to use in shell profiles as-is. Other functions may change in a minor release.

View File

@ -68,6 +68,7 @@
<item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/> <item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/>
<item name="Hadoop KMS" href="hadoop-kms/index.html"/> <item name="Hadoop KMS" href="hadoop-kms/index.html"/>
<item name="Tracing" href="hadoop-project-dist/hadoop-common/Tracing.html"/> <item name="Tracing" href="hadoop-project-dist/hadoop-common/Tracing.html"/>
<item name="Unix Shell Guide" href="hadoop-project-dist/hadoop-common/UnixShellGuide.html"/>
</menu> </menu>
<menu name="HDFS" inherit="top"> <menu name="HDFS" inherit="top">
@ -159,7 +160,8 @@
<menu name="Reference" inherit="top"> <menu name="Reference" inherit="top">
<item name="Release Notes" href="hadoop-project-dist/hadoop-common/releasenotes.html"/> <item name="Release Notes" href="hadoop-project-dist/hadoop-common/releasenotes.html"/>
<item name="API docs" href="api/index.html"/> <item name="Java API docs" href="api/index.html"/>
<item name="Unix Shell API" href="hadoop-project-dist/hadoop-common/UnixShellAPI.html"/>
<item name="Common CHANGES.txt" href="hadoop-project-dist/hadoop-common/CHANGES.txt"/> <item name="Common CHANGES.txt" href="hadoop-project-dist/hadoop-common/CHANGES.txt"/>
<item name="HDFS CHANGES.txt" href="hadoop-project-dist/hadoop-hdfs/CHANGES.txt"/> <item name="HDFS CHANGES.txt" href="hadoop-project-dist/hadoop-hdfs/CHANGES.txt"/>
<item name="MapReduce CHANGES.txt" href="hadoop-project-dist/hadoop-mapreduce/CHANGES.txt"/> <item name="MapReduce CHANGES.txt" href="hadoop-project-dist/hadoop-mapreduce/CHANGES.txt"/>