diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c4698a7e2e..b385da0103 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -339,6 +339,10 @@ Release 2.0.2-alpha - 2012-09-07 MAPREDUCE-4380. Empty Userlogs directory is getting created under logs directory (Devaraj K via bobby) + MAPREDUCE-4649. Ensure MapReduce JobHistory Daemon doens't assume + HADOOP_YARN_HOME and HADOOP_MAPRED_HOME are the same. (vinodkv via + acmurthy) + Release 2.0.0-alpha - 05-23-2012 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/INSTALL b/hadoop-mapreduce-project/INSTALL index e35c8cabde..fae237045b 100644 --- a/hadoop-mapreduce-project/INSTALL +++ b/hadoop-mapreduce-project/INSTALL @@ -19,7 +19,7 @@ mvn clean package -Pdist -Dtar -DskipTests -Pnative You can omit -Pnative it you don't want to build native packages. Step 4) Untar the tarball from hadoop-dist/target/ into a clean and different -directory, say YARN_HOME. +directory, say HADOOP_YARN_HOME. Step 5) Start hdfs @@ -32,7 +32,7 @@ You probably want to export these in hadoop-env.sh and yarn-env.sh also. export HADOOP_MAPRED_HOME= export HADOOP_COMMON_HOME= export HADOOP_HDFS_HOME= -export YARN_HOME=directory where you untarred yarn +export HADOOP_YARN_HOME=directory where you untarred yarn export HADOOP_CONF_DIR= export YARN_CONF_DIR=$HADOOP_CONF_DIR @@ -53,7 +53,7 @@ Step 8) Modify mapred-site.xml to use yarn framework yarn -Step 9) cd $YARN_HOME +Step 9) cd $HADOOP_YARN_HOME Step 10) sbin/yarn-daemon.sh start resourcemanager @@ -64,7 +64,7 @@ Step 12) sbin/mr-jobhistory-daemon.sh start historyserver Step 13) You are all set, an example on how to run a mapreduce job is: cd $HADOOP_MAPRED_HOME ant examples -Dresolvers=internal -$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output +$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $HADOOP_YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output The output on the command line should be almost similar to what you see in the JT/TT setup (Hadoop 0.20/0.21) diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index deb9cafea6..86560c1211 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -115,6 +115,11 @@ for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do CLASSPATH=${CLASSPATH}:$f; done +# Need YARN jars also +for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + # add libs to CLASSPATH for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do CLASSPATH=${CLASSPATH}:$f; diff --git a/hadoop-mapreduce-project/bin/mapred-config.sh b/hadoop-mapreduce-project/bin/mapred-config.sh index d1eb627adb..c29b7e919b 100644 --- a/hadoop-mapreduce-project/bin/mapred-config.sh +++ b/hadoop-mapreduce-project/bin/mapred-config.sh @@ -38,3 +38,14 @@ else echo "Hadoop common not found." exit fi + +# some more specific variables +export HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs} +export HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log} + +HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console} +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR" +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE" +export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}" + + diff --git a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh index ed2eef0eb9..52f1be8ca2 100644 --- a/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh +++ b/hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh @@ -16,22 +16,16 @@ # limitations under the License. -# Runs a yarn command as a daemon. # # Environment Variables # -# HADOOP_LOGFILE Hadoop log file. -# HADOOP_ROOT_LOGGER Hadoop root logger. # HADOOP_JHS_LOGGER Hadoop JobSummary logger. -# YARN_CONF_DIR Alternate conf dir. Default is ${YARN_HOME}/conf. -# YARN_LOG_DIR Where log files are stored. PWD by default. -# YARN_MASTER host:path where hadoop code should be rsync'd from -# YARN_PID_DIR The pid files are stored. /tmp by default. -# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default -# YARN_NICENESS The scheduling priority for daemons. Defaults to 0. +# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf. +# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default. +# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0. ## -usage="Usage: mr-jobhistory-daemon.sh [--config ] [--hosts hostlistfile] (start|stop) " +usage="Usage: mr-jobhistory-daemon.sh [--config ] (start|stop) " # if no args specified, show usage if [ $# -le 1 ]; then @@ -42,10 +36,6 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin"/../libexec -HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} -. $HADOOP_LIBEXEC_DIR/yarn-config.sh - # get arguments startStop=$1 shift @@ -69,43 +59,47 @@ hadoop_rotate_log () fi } -if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then - . "${YARN_CONF_DIR}/yarn-env.sh" +if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then + export HADOOP_MAPRED_IDENT_STRING="$USER" fi -if [ "$YARN_IDENT_STRING" = "" ]; then - export YARN_IDENT_STRING="$USER" -fi - -# get log directory -if [ "$YARN_LOG_DIR" = "" ]; then - export YARN_LOG_DIR="$YARN_HOME/logs" -fi -mkdir -p "$YARN_LOG_DIR" -chown $YARN_IDENT_STRING $YARN_LOG_DIR - -if [ "$YARN_PID_DIR" = "" ]; then - YARN_PID_DIR=/tmp -fi - -# some variables -export HADOOP_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log -export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,RFA} +export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}} +export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log +export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA} export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA} -log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out -pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid -YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5} + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +. $HADOOP_LIBEXEC_DIR/mapred-config.sh + +if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then + . "${HADOOP_CONF_DIR}/mapred-env.sh" +fi + +mkdir -p "$HADOOP_MAPRED_LOG_DIR" +chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR + +if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then + HADOOP_MAPRED_PID_DIR=/tmp +fi + +HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING" + +log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out +pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid + +HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5} # Set default scheduling priority -if [ "$YARN_NICENESS" = "" ]; then - export YARN_NICENESS=0 +if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then + export HADOOP_MAPRED_NICENESS=0 fi case $startStop in (start) - mkdir -p "$YARN_PID_DIR" + mkdir -p "$HADOOP_MAPRED_PID_DIR" if [ -f $pid ]; then if kill -0 `cat $pid` > /dev/null 2>&1; then @@ -114,15 +108,10 @@ case $startStop in fi fi - if [ "$YARN_MASTER" != "" ]; then - echo rsync from $YARN_MASTER - rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$YARN_HOME" - fi - hadoop_rotate_log $log echo starting $command, logging to $log - cd "$YARN_HOME" - nohup nice -n $YARN_NICENESS "$YARN_HOME"/bin/mapred --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & + cd "$HADOOP_MAPRED_HOME" + nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & echo $! > $pid sleep 1; head "$log" ;; @@ -134,9 +123,9 @@ case $startStop in if kill -0 $TARGET_PID > /dev/null 2>&1; then echo stopping $command kill $TARGET_PID - sleep $YARN_STOP_TIMEOUT + sleep $HADOOP_MAPRED_STOP_TIMEOUT if kill -0 $TARGET_PID > /dev/null 2>&1; then - echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9" + echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9" kill -9 $TARGET_PID fi else diff --git a/hadoop-mapreduce-project/conf/mapred-env.sh b/hadoop-mapreduce-project/conf/mapred-env.sh new file mode 100644 index 0000000000..6be1e27966 --- /dev/null +++ b/hadoop-mapreduce-project/conf/mapred-env.sh @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# export JAVA_HOME=/home/y/libexec/jdk1.6.0/ + +export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000 + +export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA + +#export HADOOP_JOB_HISTORYSERVER_OPTS= +#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default. +#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger. +#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default. +#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default +#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml index 10a6bea3be..f3ec383f91 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml @@ -102,7 +102,7 @@ dfs.permissions.enabledtrue mapreduce.tasktracker.taskcontrollerorg.apache.hadoop.mapred.DefaultTaskController mapreduce.reduce.shuffle.parallelcopies5 -yarn.nodemanager.env-whitelistJAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME +yarn.nodemanager.env-whitelistJAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME mapreduce.jobtracker.heartbeats.in.second100 mapreduce.job.maxtaskfailures.per.tracker4 ipc.client.connection.maxidletime10000 @@ -317,8 +317,8 @@ $HADOOP_COMMON_HOME/share/hadoop/common/lib/*, $HADOOP_HDFS_HOME/share/hadoop/hdfs/*, $HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*, - $YARN_HOME/share/hadoop/mapreduce/*, - $YARN_HOME/share/hadoop/mapreduce/lib/* + $HADOOP_YARN_HOME/share/hadoop/mapreduce/*, + $HADOOP_YARN_HOME/share/hadoop/mapreduce/lib/* yarn.nodemanager.log-aggregation.compression-typegz dfs.image.compressfalse