diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index b404c38e03..1f9d534702 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -159,6 +159,9 @@ Release 0.23.2 - UNRELEASED MAPREDUCE-3497. Added docs for YARN CLI. (tgraves via acmurthy) + MAPREDUCE-3954. Added new envs to separate heap size for different daemons + started via bin scripts. (Robert Joseph Evans via vinodkv) + OPTIMIZATIONS MAPREDUCE-3901. Modified JobHistory records in YARN to lazily load job and diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index eb13e60782..deb9cafea6 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -51,8 +51,6 @@ fi COMMAND=$1 shift -HADOOP_JOB_HISTORYSERVER_OPTS="-Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console}" - if [ "$COMMAND" = "job" ] ; then CLASS=org.apache.hadoop.mapred.JobClient elif [ "$COMMAND" = "queue" ] ; then @@ -70,7 +68,10 @@ elif [ "$COMMAND" = "groups" ] ; then HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS" elif [ "$COMMAND" = "historyserver" ] ; then CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer - HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOB_HISTORYSERVER_OPTS" + HADOOP_OPTS="$HADOOP_OPTS -Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console} $HADOOP_JOB_HISTORYSERVER_OPTS" + if [ "$HADOOP_JOB_HISTORYSERVER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$HADOOP_JOB_HISTORYSERVER_HEAPSIZE""m" + fi elif [ "$COMMAND" = "mradmin" ] \ || [ "$COMMAND" = "jobtracker" ] \ || [ "$COMMAND" = "tasktracker" ] ; then diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index fe8006101d..6f5cf76cdc 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -27,11 +27,16 @@ # YARN_HEAPSIZE The maximum amount of heap to use, in MB. # Default is 1000. # +# YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command +# eg YARN_NODEMANAGER_HEAPSIZE sets the heap +# size for the NodeManager. If you set the +# heap size in YARN_{COMMAND}_OPTS or YARN_OPTS +# they take precedence. +# # YARN_OPTS Extra Java runtime options. # -# YARN_NAMENODE_OPTS These options are added to YARN_OPTS # YARN_CLIENT_OPTS when the respective command is run. -# YARN_{COMMAND}_OPTS etc YARN_JT_OPTS applies to JobTracker +# YARN_{COMMAND}_OPTS etc YARN_NODEMANAGER_OPTS applies to NodeManager # for e.g. YARN_CLIENT_OPTS applies to # more than one command (fs, dfs, fsck, # dfsadmin etc) @@ -170,6 +175,9 @@ elif [ "$COMMAND" = "resourcemanager" ] ; then CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager' YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS" + if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m" + fi elif [ "$COMMAND" = "nodemanager" ] ; then CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager' @@ -178,9 +186,15 @@ elif [ "$COMMAND" = "nodemanager" ] ; then else YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS" fi + if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m" + fi elif [ "$COMMAND" = "proxyserver" ] ; then CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer' YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS" + if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m" + fi elif [ "$COMMAND" = "version" ] ; then CLASS=org.apache.hadoop.util.VersionInfo YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm index abdbe894d9..1eceaa0f56 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm @@ -103,6 +103,9 @@ Hadoop MapReduce Next Generation - Cluster Setup *--------------------------------------+--------------------------------------+ | WebAppProxy | YARN_PROXYSERVER_OPTS | *--------------------------------------+--------------------------------------+ +| Map Reduce Job History Server | HADOOP_JOB_HISTORYSERVER_OPTS | +*--------------------------------------+--------------------------------------+ + For example, To configure Namenode to use parallelGC, the following statement should be added in hadoop-env.sh : @@ -118,10 +121,22 @@ Hadoop MapReduce Next Generation - Cluster Setup don't exist. * <<>> / <<>> - The maximum amount of - heapsize to use, in MB e.g. 1000MB. This is used to configure the heap - size for the daemon. By default, the value is 1000MB. - - + heapsize to use, in MB e.g. if the varibale is set to 1000 the heap + will be set to 1000MB. This is used to configure the heap + size for the daemon. By default, the value is 1000. If you want to + configure the values separately for each deamon you can use. +*--------------------------------------+--------------------------------------+ +|| Daemon || Environment Variable | +*--------------------------------------+--------------------------------------+ +| ResourceManager | YARN_RESOURCEMANAGER_HEAPSIZE | +*--------------------------------------+--------------------------------------+ +| NodeManager | YARN_NODEMANAGER_HEAPSIZE | +*--------------------------------------+--------------------------------------+ +| WebAppProxy | YARN_PROXYSERVER_HEAPSIZE | +*--------------------------------------+--------------------------------------+ +| Map Reduce Job History Server | HADOOP_JOB_HISTORYSERVER_HEAPSIZE | +*--------------------------------------+--------------------------------------+ + * {Configuring the Hadoop Daemons in Non-Secure Mode} This section deals with important parameters to be specified in