diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml index 281ce0ddcd..eed27a8857 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml @@ -71,6 +71,14 @@ 0755 + + bin + sbin + + mr-jobhistory-daemon.sh + + 0755 + hadoop-yarn/conf etc/hadoop diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 587fe5e953..f74b1f96ed 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -721,6 +721,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3808. Fixed an NPE in FileOutputCommitter for jobs with maps but no reduces. (Robert Joseph Evans via vinodkv) + MAPREDUCE-3354. Changed scripts so that jobhistory server is started by + bin/mapred instead of bin/yarn. (Jonathan Eagles via acmurthy) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/INSTALL b/hadoop-mapreduce-project/INSTALL index e75b2aff2f..cac914bad7 100644 --- a/hadoop-mapreduce-project/INSTALL +++ b/hadoop-mapreduce-project/INSTALL @@ -59,12 +59,12 @@ Step 10) sbin/yarn-daemon.sh start resourcemanager Step 11) sbin/yarn-daemon.sh start nodemanager -Step 12) sbin/yarn-daemon.sh start historyserver +Step 12) sbin/mr-jobhistory-daemon.sh start historyserver Step 13) You are all set, an example on how to run a mapreduce job is: cd $HADOOP_MAPRED_HOME ant examples -Dresolvers=internal -$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-0.24.0-SNAPSHOT.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.clientfactory.class.name=org.apache.hadoop.mapred.YarnClientFactory -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-0.24.0-SNAPSHOT.jar output +$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.clientfactory.class.name=org.apache.hadoop.mapred.YarnClientFactory -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output The output on the command line should be almost similar to what you see in the JT/TT setup (Hadoop 0.20/0.21) diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index ff1ebbc67d..5ade3dabf4 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -36,6 +36,7 @@ function print_usage(){ echo " classpath prints the class path needed for running" echo " mapreduce subcommands" echo " groups get the groups which users belong to" + echo " historyserver run job history servers as a standalone daemon" echo "" echo "Most commands print help when invoked w/o parameters." } @@ -48,6 +49,8 @@ fi COMMAND=$1 shift +HADOOP_JOB_HISTORYSERVER_OPTS="-Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console}" + if [ "$COMMAND" = "job" ] ; then CLASS=org.apache.hadoop.mapred.JobClient elif [ "$COMMAND" = "queue" ] ; then @@ -63,6 +66,9 @@ elif [ "$COMMAND" = "classpath" ] ; then elif [ "$COMMAND" = "groups" ] ; then CLASS=org.apache.hadoop.mapred.tools.GetGroups HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" +elif [ "$COMMAND" = "historyserver" ] ; then + CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer + HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOB_HISTORYSERVER_OPTS" elif [ "$COMMAND" = "mradmin" ] \ || [ "$COMMAND" = "jobtracker" ] \ || [ "$COMMAND" = "tasktracker" ] ; then @@ -103,6 +109,11 @@ for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do CLASSPATH=${CLASSPATH}:$f; done +# add modules to CLASSPATH +for f in $HADOOP_MAPRED_HOME/modules/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + if $cygwin; then CLASSPATH=`cygpath -p -w "$CLASSPATH"` fi @@ -112,12 +123,7 @@ if [ "$COMMAND" = "classpath" ] ; then exit fi -#turn security logger on the jobtracker -if [ $COMMAND = "jobtracker" ]; then - HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,DRFAS}" -else - HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" -fi +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}" export CLASSPATH exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" diff --git a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh new file mode 100644 index 0000000000..6fc3ee7e86 --- /dev/null +++ b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Runs a yarn command as a daemon. +# +# Environment Variables +# +# YARN_CONF_DIR Alternate conf dir. Default is ${YARN_HOME}/conf. +# YARN_LOG_DIR Where log files are stored. PWD by default. +# YARN_MASTER host:path where hadoop code should be rsync'd from +# YARN_PID_DIR The pid files are stored. /tmp by default. +# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default +# YARN_NICENESS The scheduling priority for daemons. Defaults to 0. +## + +usage="Usage: mr-jobhistory-daemon.sh [--config ] [--hosts hostlistfile] (start|stop) " + +# if no args specified, show usage +if [ $# -le 1 ]; then + echo $usage + exit 1 +fi + +bin=`dirname "${BASH_SOURCE-$0}"` +bin=`cd "$bin"; pwd` + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/yarn-config.sh + +# get arguments +startStop=$1 +shift +command=$1 +shift + +hadoop_rotate_log () +{ + log=$1; + num=5; + if [ -n "$2" ]; then + num=$2 + fi + if [ -f "$log" ]; then # rotate logs + while [ $num -gt 1 ]; do + prev=`expr $num - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=$prev + done + mv "$log" "$log.$num"; + fi +} + +if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then + . "${YARN_CONF_DIR}/yarn-env.sh" +fi + +if [ "$YARN_IDENT_STRING" = "" ]; then + export YARN_IDENT_STRING="$USER" +fi + +# get log directory +if [ "$YARN_LOG_DIR" = "" ]; then + export YARN_LOG_DIR="$YARN_HOME/logs" +fi +mkdir -p "$YARN_LOG_DIR" +chown $YARN_IDENT_STRING $YARN_LOG_DIR + +if [ "$YARN_PID_DIR" = "" ]; then + YARN_PID_DIR=/tmp +fi + +# some variables +export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log +export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,DRFA} +log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out +pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid + +# Set default scheduling priority +if [ "$YARN_NICENESS" = "" ]; then + export YARN_NICENESS=0 +fi + +case $startStop in + + (start) + + mkdir -p "$YARN_PID_DIR" + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo $command running as process `cat $pid`. Stop it first. + exit 1 + fi + fi + + if [ "$YARN_MASTER" != "" ]; then + echo rsync from $YARN_MASTER + rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$YARN_HOME" + fi + + hadoop_rotate_log $log + echo starting $command, logging to $log + cd "$YARN_HOME" + nohup nice -n $YARN_NICENESS "$YARN_HOME"/bin/mapred --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + echo $! > $pid + sleep 1; head "$log" + ;; + + (stop) + + if [ -f $pid ]; then + if kill -0 `cat $pid` > /dev/null 2>&1; then + echo stopping $command + kill `cat $pid` + else + echo no $command to stop + fi + else + echo no $command to stop + fi + ;; + + (*) + echo $usage + exit 1 + ;; + +esac diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh index ccd63a4478..40b77fb35a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh @@ -31,7 +31,5 @@ HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start resourcemanager # start nodeManager "$bin"/yarn-daemons.sh --config $YARN_CONF_DIR start nodemanager -# start historyserver -#"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start historyserver # start proxyserver #"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR start proxyserver diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh index c10d1ce7d1..a8498ef3ff 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh @@ -31,7 +31,5 @@ HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop resourcemanager # stop nodeManager "$bin"/yarn-daemons.sh --config $YARN_CONF_DIR stop nodemanager -# stop historyServer -"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop historyserver # stop proxy server "$bin"/yarn-daemon.sh --config $YARN_CONF_DIR stop proxyserver diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index 7ceac4feae..d7dae8b8d8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -59,7 +59,6 @@ if [ $# = 0 ]; then echo "where COMMAND is one of:" echo " resourcemanager run the ResourceManager" echo " nodemanager run a nodemanager on each slave" - echo " historyserver run job history servers as a standalone daemon" echo " rmadmin admin tools" echo " version print the version" echo " jar run a jar file" @@ -154,8 +153,6 @@ if [ "$YARN_LOGFILE" = "" ]; then YARN_LOGFILE='yarn.log' fi -YARN_JOB_HISTORYSERVER_OPTS="-Dmapred.jobsummary.logger=${YARN_JHS_LOGGER:-INFO,console}" - # restore ordinary behaviour unset IFS @@ -181,9 +178,6 @@ elif [ "$COMMAND" = "nodemanager" ] ; then else YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS" fi -elif [ "$COMMAND" = "historyserver" ] ; then - CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer - YARN_OPTS="$YARN_OPTS $YARN_JOB_HISTORYSERVER_OPTS" elif [ "$COMMAND" = "proxyserver" ] ; then CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS" diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh index 1fa43d8b1b..c36e99cac7 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh @@ -91,7 +91,6 @@ fi # some variables export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,DRFA} -export YARN_JHS_LOGGER=${YARN_JHS_LOGGER:-INFO,JSA} log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm index 0290c23b8a..67db4b13ae 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm @@ -476,7 +476,7 @@ Hadoop MapReduce Next Generation - Cluster Setup designated server: ---- - $ $YARN_HOME/bin/yarn start historyserver --config $HADOOP_CONF_DIR + $ $YARN_HOME/bin/mapred start historyserver --config $YARN_CONF_DIR ---- * Hadoop Shutdown @@ -519,7 +519,7 @@ Hadoop MapReduce Next Generation - Cluster Setup designated server: ---- - $ $YARN_HOME/bin/yarn stop historyserver --config $HADOOP_CONF_DIR + $ $YARN_HOME/bin/mapred stop historyserver --config $YARN_CONF_DIR ---- @@ -1020,7 +1020,7 @@ KVNO Timestamp Principal designated server as : ---- -[mapred]$ $YARN_HOME/bin/yarn start historyserver --config $HADOOP_CONF_DIR +[mapred]$ $YARN_HOME/bin/mapred start historyserver --config $YARN_CONF_DIR ---- * Hadoop Shutdown @@ -1063,7 +1063,7 @@ KVNO Timestamp Principal designated server as : ---- -[mapred]$ $YARN_HOME/bin/yarn stop historyserver --config $HADOOP_CONF_DIR +[mapred]$ $YARN_HOME/bin/mapred stop historyserver --config $YARN_CONF_DIR ---- * {Web Interfaces}