MAPREDUCE-4649. Ensure MapReduce JobHistory Daemon doens't assume HADOOP_YARN_HOME and HADOOP_MAPRED_HOME are the same. Contributed by Vinod K V.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1390224 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arun Murthy 2012-09-25 23:49:09 +00:00
parent 296ee4575e
commit 1eedee177e
7 changed files with 92 additions and 56 deletions

View File

@ -339,6 +339,10 @@ Release 2.0.2-alpha - 2012-09-07
MAPREDUCE-4380. Empty Userlogs directory is getting created under logs
directory (Devaraj K via bobby)
MAPREDUCE-4649. Ensure MapReduce JobHistory Daemon doens't assume
HADOOP_YARN_HOME and HADOOP_MAPRED_HOME are the same. (vinodkv via
acmurthy)
Release 2.0.0-alpha - 05-23-2012
INCOMPATIBLE CHANGES

View File

@ -19,7 +19,7 @@ mvn clean package -Pdist -Dtar -DskipTests -Pnative
You can omit -Pnative it you don't want to build native packages.
Step 4) Untar the tarball from hadoop-dist/target/ into a clean and different
directory, say YARN_HOME.
directory, say HADOOP_YARN_HOME.
Step 5)
Start hdfs
@ -32,7 +32,7 @@ You probably want to export these in hadoop-env.sh and yarn-env.sh also.
export HADOOP_MAPRED_HOME=<mapred loc>
export HADOOP_COMMON_HOME=<common loc>
export HADOOP_HDFS_HOME=<hdfs loc>
export YARN_HOME=directory where you untarred yarn
export HADOOP_YARN_HOME=directory where you untarred yarn
export HADOOP_CONF_DIR=<conf loc>
export YARN_CONF_DIR=$HADOOP_CONF_DIR
@ -53,7 +53,7 @@ Step 8) Modify mapred-site.xml to use yarn framework
<value>yarn</value>
</property>
Step 9) cd $YARN_HOME
Step 9) cd $HADOOP_YARN_HOME
Step 10) sbin/yarn-daemon.sh start resourcemanager
@ -64,7 +64,7 @@ Step 12) sbin/mr-jobhistory-daemon.sh start historyserver
Step 13) You are all set, an example on how to run a mapreduce job is:
cd $HADOOP_MAPRED_HOME
ant examples -Dresolvers=internal
$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output
$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-*.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $HADOOP_YARN_HOME/modules/hadoop-mapreduce-client-jobclient-*.jar output
The output on the command line should be almost similar to what you see in the JT/TT setup (Hadoop 0.20/0.21)

View File

@ -115,6 +115,11 @@ for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# Need YARN jars also
for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add libs to CLASSPATH
for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do
CLASSPATH=${CLASSPATH}:$f;

View File

@ -38,3 +38,14 @@ else
echo "Hadoop common not found."
exit
fi
# some more specific variables
export HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs}
export HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log}
HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE"
export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}"

View File

@ -16,22 +16,16 @@
# limitations under the License.
# Runs a yarn command as a daemon.
#
# Environment Variables
#
# HADOOP_LOGFILE Hadoop log file.
# HADOOP_ROOT_LOGGER Hadoop root logger.
# HADOOP_JHS_LOGGER Hadoop JobSummary logger.
# YARN_CONF_DIR Alternate conf dir. Default is ${YARN_HOME}/conf.
# YARN_LOG_DIR Where log files are stored. PWD by default.
# YARN_MASTER host:path where hadoop code should be rsync'd from
# YARN_PID_DIR The pid files are stored. /tmp by default.
# YARN_IDENT_STRING A string representing this instance of hadoop. $USER by default
# YARN_NICENESS The scheduling priority for daemons. Defaults to 0.
# HADOOP_CONF_DIR Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf.
# HADOOP_MAPRED_PID_DIR The pid files are stored. /tmp by default.
# HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0.
##
usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <mapred-command> "
usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] (start|stop) <mapred-command> "
# if no args specified, show usage
if [ $# -le 1 ]; then
@ -42,10 +36,6 @@ fi
bin=`dirname "${BASH_SOURCE-$0}"`
bin=`cd "$bin"; pwd`
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/yarn-config.sh
# get arguments
startStop=$1
shift
@ -69,43 +59,47 @@ hadoop_rotate_log ()
fi
}
if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
. "${YARN_CONF_DIR}/yarn-env.sh"
if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then
export HADOOP_MAPRED_IDENT_STRING="$USER"
fi
if [ "$YARN_IDENT_STRING" = "" ]; then
export YARN_IDENT_STRING="$USER"
fi
# get log directory
if [ "$YARN_LOG_DIR" = "" ]; then
export YARN_LOG_DIR="$YARN_HOME/logs"
fi
mkdir -p "$YARN_LOG_DIR"
chown $YARN_IDENT_STRING $YARN_LOG_DIR
if [ "$YARN_PID_DIR" = "" ]; then
YARN_PID_DIR=/tmp
fi
# some variables
export HADOOP_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,RFA}
export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}}
export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log
export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA}
export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA}
log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out
pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid
YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5}
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/mapred-config.sh
if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
. "${HADOOP_CONF_DIR}/mapred-env.sh"
fi
mkdir -p "$HADOOP_MAPRED_LOG_DIR"
chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR
if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then
HADOOP_MAPRED_PID_DIR=/tmp
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING"
log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out
pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid
HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5}
# Set default scheduling priority
if [ "$YARN_NICENESS" = "" ]; then
export YARN_NICENESS=0
if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then
export HADOOP_MAPRED_NICENESS=0
fi
case $startStop in
(start)
mkdir -p "$YARN_PID_DIR"
mkdir -p "$HADOOP_MAPRED_PID_DIR"
if [ -f $pid ]; then
if kill -0 `cat $pid` > /dev/null 2>&1; then
@ -114,15 +108,10 @@ case $startStop in
fi
fi
if [ "$YARN_MASTER" != "" ]; then
echo rsync from $YARN_MASTER
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$YARN_HOME"
fi
hadoop_rotate_log $log
echo starting $command, logging to $log
cd "$YARN_HOME"
nohup nice -n $YARN_NICENESS "$YARN_HOME"/bin/mapred --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
cd "$HADOOP_MAPRED_HOME"
nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
echo $! > $pid
sleep 1; head "$log"
;;
@ -134,9 +123,9 @@ case $startStop in
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo stopping $command
kill $TARGET_PID
sleep $YARN_STOP_TIMEOUT
sleep $HADOOP_MAPRED_STOP_TIMEOUT
if kill -0 $TARGET_PID > /dev/null 2>&1; then
echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9"
echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9"
kill -9 $TARGET_PID
fi
else

View File

@ -0,0 +1,27 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
#export HADOOP_JOB_HISTORYSERVER_OPTS=
#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored. $HADOOP_MAPRED_HOME/logs by default.
#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.

View File

@ -102,7 +102,7 @@
<property><!--Loaded from job.xml--><name>dfs.permissions.enabled</name><value>true</value></property>
<property><!--Loaded from job.xml--><name>mapreduce.tasktracker.taskcontroller</name><value>org.apache.hadoop.mapred.DefaultTaskController</value></property>
<property><!--Loaded from job.xml--><name>mapreduce.reduce.shuffle.parallelcopies</name><value>5</value></property>
<property><!--Loaded from job.xml--><name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME</value></property>
<property><!--Loaded from job.xml--><name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME</value></property>
<property><!--Loaded from job.xml--><name>mapreduce.jobtracker.heartbeats.in.second</name><value>100</value></property>
<property><!--Loaded from job.xml--><name>mapreduce.job.maxtaskfailures.per.tracker</name><value>4</value></property>
<property><!--Loaded from job.xml--><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
@ -317,8 +317,8 @@
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
$YARN_HOME/share/hadoop/mapreduce/*,
$YARN_HOME/share/hadoop/mapreduce/lib/*
$HADOOP_YARN_HOME/share/hadoop/mapreduce/*,
$HADOOP_YARN_HOME/share/hadoop/mapreduce/lib/*
</value></property>
<property><!--Loaded from job.xml--><name>yarn.nodemanager.log-aggregation.compression-type</name><value>gz</value></property>
<property><!--Loaded from job.xml--><name>dfs.image.compress</name><value>false</value></property>