283 lines
9.4 KiB
Bash
283 lines
9.4 KiB
Bash
|
#!/usr/bin/env bash
|
||
|
|
||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||
|
# contributor license agreements. See the NOTICE file distributed with
|
||
|
# this work for additional information regarding copyright ownership.
|
||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||
|
# (the "License"); you may not use this file except in compliance with
|
||
|
# the License. You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
bin=`dirname "$0"`
|
||
|
bin=`cd "$bin"; pwd`
|
||
|
|
||
|
if [ "$HADOOP_HOME" != "" ]; then
|
||
|
echo "Warning: \$HADOOP_HOME is deprecated."
|
||
|
echo
|
||
|
fi
|
||
|
|
||
|
. "$bin"/../libexec/hadoop-config.sh
|
||
|
|
||
|
usage() {
|
||
|
echo "
|
||
|
usage: $0 <parameters>
|
||
|
|
||
|
Optional parameters:
|
||
|
--auto Setup automatically
|
||
|
--default Generate default config
|
||
|
--conf-dir=/etc/hadoop Set config directory
|
||
|
--datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory
|
||
|
-h Display this message
|
||
|
--jobtracker-url=hostname:9001 Set jobtracker url
|
||
|
--log-dir=/var/log/hadoop Set log directory
|
||
|
--hdfs-dir=/var/lib/hadoop/hdfs Set hdfs directory
|
||
|
--mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory
|
||
|
--namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory
|
||
|
--namenode-url=hdfs://hostname:9000/ Set namenode url
|
||
|
--replication=3 Set replication factor
|
||
|
--taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler
|
||
|
"
|
||
|
exit 1
|
||
|
}
|
||
|
|
||
|
template_generator() {
|
||
|
REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
|
||
|
cat $1 |
|
||
|
while read line ; do
|
||
|
while [[ "$line" =~ $REGEX ]] ; do
|
||
|
LHS=${BASH_REMATCH[1]}
|
||
|
RHS="$(eval echo "\"$LHS\"")"
|
||
|
line=${line//$LHS/$RHS}
|
||
|
done
|
||
|
echo $line >> $2
|
||
|
done
|
||
|
}
|
||
|
|
||
|
OPTS=$(getopt \
|
||
|
-n $0 \
|
||
|
-o '' \
|
||
|
-l 'auto' \
|
||
|
-l 'conf-dir:' \
|
||
|
-l 'default' \
|
||
|
-l 'hdfs-dir:' \
|
||
|
-l 'namenode-dir:' \
|
||
|
-l 'datanode-dir:' \
|
||
|
-l 'mapred-dir:' \
|
||
|
-l 'namenode-url:' \
|
||
|
-l 'jobtracker-url:' \
|
||
|
-l 'log-dir:' \
|
||
|
-l 'replication:' \
|
||
|
-l 'taskscheduler:' \
|
||
|
-o 'h' \
|
||
|
-- "$@")
|
||
|
|
||
|
if [ $? != 0 ] ; then
|
||
|
usage
|
||
|
fi
|
||
|
|
||
|
# Make sure the HADOOP_LOG_DIR is not picked up from user environment.
|
||
|
unset HADOOP_LOG_DIR
|
||
|
|
||
|
# Parse script parameters
|
||
|
eval set -- "${OPTS}"
|
||
|
while true ; do
|
||
|
case "$1" in
|
||
|
--auto)
|
||
|
AUTOSETUP=1
|
||
|
AUTOMATED=1
|
||
|
shift
|
||
|
;;
|
||
|
--conf-dir)
|
||
|
HADOOP_CONF_DIR=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--default)
|
||
|
AUTOMATED=1; shift
|
||
|
;;
|
||
|
-h)
|
||
|
usage
|
||
|
;;
|
||
|
--hdfs-dir)
|
||
|
HADOOP_HDFS_DIR=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--namenode-dir)
|
||
|
HADOOP_NN_DIR=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--datanode-dir)
|
||
|
HADOOP_DN_DIR=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--mapred-dir)
|
||
|
HADOOP_MAPRED_DIR=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--namenode-url)
|
||
|
HADOOP_NN_HOST=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--jobtracker-url)
|
||
|
HADOOP_JT_HOST=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--log-dir)
|
||
|
HADOOP_LOG_DIR=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--replication)
|
||
|
HADOOP_REPLICATION=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--taskscheduler)
|
||
|
HADOOP_TASK_SCHEDULER=$2; shift 2
|
||
|
AUTOMATED=1
|
||
|
;;
|
||
|
--)
|
||
|
shift ; break
|
||
|
;;
|
||
|
*)
|
||
|
echo "Unknown option: $1"
|
||
|
usage
|
||
|
exit 1
|
||
|
;;
|
||
|
esac
|
||
|
done
|
||
|
|
||
|
# Fill in default values, if parameters have not been defined.
|
||
|
AUTOSETUP=${AUTOSETUP:-1}
|
||
|
JAVA_HOME=${JAVA_HOME:-/usr/java/default}
|
||
|
HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://`hostname`:9000/}
|
||
|
HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
|
||
|
HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
|
||
|
HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`:9001}
|
||
|
HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
|
||
|
HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
|
||
|
HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
|
||
|
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
|
||
|
HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
|
||
|
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
|
||
|
|
||
|
# Interactive setup wizard
|
||
|
if [ "${AUTOMATED}" != "1" ]; then
|
||
|
echo "Setup Hadoop Configuration"
|
||
|
echo
|
||
|
echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) "
|
||
|
read USER_HADOOP_CONF_DIR
|
||
|
echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
|
||
|
read USER_HADOOP_LOG_DIR
|
||
|
echo -n "What is the url of the namenode? (${HADOOP_NN_HOST}) "
|
||
|
read USER_HADOOP_NN_HOST
|
||
|
echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
|
||
|
read USER_HADOOP_NN_DIR
|
||
|
echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
|
||
|
read USER_HADOOP_DN_DIR
|
||
|
echo -n "What is the url of the jobtracker? (${HADOOP_JT_HOST}) "
|
||
|
read USER_HADOOP_JT_HOST
|
||
|
echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
|
||
|
read USER_HADOOP_MAPRED_DIR
|
||
|
echo -n "Which taskscheduler would you like? (${HADOOP_TASK_SCHEDULER}) "
|
||
|
read USER_HADOOP_TASK_SCHEDULER
|
||
|
echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
|
||
|
read USER_JAVA_HOME
|
||
|
echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
|
||
|
read USER_AUTOSETUP
|
||
|
echo
|
||
|
JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME}
|
||
|
HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST}
|
||
|
HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR}
|
||
|
HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR}
|
||
|
HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST}
|
||
|
HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR}
|
||
|
HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
|
||
|
HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
|
||
|
HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
|
||
|
HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
|
||
|
AUTOSETUP=${USER_AUTOSETUP:-y}
|
||
|
echo "Review your choices:"
|
||
|
echo
|
||
|
echo "Config directory : ${HADOOP_CONF_DIR}"
|
||
|
echo "Log directory : ${HADOOP_LOG_DIR}"
|
||
|
echo "Namenode url : ${HADOOP_NN_HOST}"
|
||
|
echo "Namenode directory : ${HADOOP_NN_DIR}"
|
||
|
echo "Datanode directory : ${HADOOP_DN_DIR}"
|
||
|
echo "Jobtracker url : ${HADOOP_JT_HOST}"
|
||
|
echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}"
|
||
|
echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}"
|
||
|
echo "JAVA_HOME directory : ${JAVA_HOME}"
|
||
|
echo "Create dirs/copy conf files : ${AUTOSETUP}"
|
||
|
echo
|
||
|
echo -n "Proceed with generate configuration? (y/N) "
|
||
|
read CONFIRM
|
||
|
if [ "${CONFIRM}" != "y" ]; then
|
||
|
echo "User aborted setup, exiting..."
|
||
|
exit 1
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ "${AUTOSETUP}" == "1" ]; then
|
||
|
# If user wants to setup local system automatically,
|
||
|
# set config file generation location to HADOOP_CONF_DIR.
|
||
|
DEST=${HADOOP_CONF_DIR}
|
||
|
else
|
||
|
# If user is only interested to generate config file locally,
|
||
|
# place config files in the current working directory.
|
||
|
DEST=`pwd`
|
||
|
fi
|
||
|
|
||
|
# remove existing config file, they are existed in current directory.
|
||
|
rm -f ${DEST}/core-site.xml >/dev/null
|
||
|
rm -f ${DEST}/hdfs-site.xml >/dev/null
|
||
|
rm -f ${DEST}/mapred-site.xml >/dev/null
|
||
|
rm -f ${DEST}/hadoop-env.sh >/dev/null
|
||
|
|
||
|
# Generate config file with specified parameters.
|
||
|
template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/core-site.xml ${DEST}/core-site.xml
|
||
|
template_generator ${HADOOP_PREFIX}/share/hadoop/hdfs/templates/hdfs-site.xml ${DEST}/hdfs-site.xml
|
||
|
template_generator ${HADOOP_PREFIX}/share/hadoop/mapreduce/templates/mapred-site.xml ${DEST}/mapred-site.xml
|
||
|
template_generator ${HADOOP_CONF_DIR}/hadoop-env.sh.template ${DEST}/hadoop-env.sh
|
||
|
|
||
|
chown root:hadoop ${DEST}/hadoop-env.sh
|
||
|
chmod 755 ${DEST}/hadoop-env.sh
|
||
|
|
||
|
# Setup directory path and copy config files, if AUTOSETUP is chosen.
|
||
|
if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
|
||
|
mkdir -p ${HADOOP_HDFS_DIR}
|
||
|
mkdir -p ${HADOOP_NN_DIR}
|
||
|
mkdir -p ${HADOOP_DN_DIR}
|
||
|
mkdir -p ${HADOOP_MAPRED_DIR}
|
||
|
mkdir -p ${HADOOP_CONF_DIR}
|
||
|
mkdir -p ${HADOOP_LOG_DIR}
|
||
|
mkdir -p ${HADOOP_LOG_DIR}/hdfs
|
||
|
mkdir -p ${HADOOP_LOG_DIR}/mapred
|
||
|
chown hdfs:hadoop ${HADOOP_HDFS_DIR}
|
||
|
chown hdfs:hadoop ${HADOOP_NN_DIR}
|
||
|
chown hdfs:hadoop ${HADOOP_DN_DIR}
|
||
|
chown mapred:hadoop ${HADOOP_MAPRED_DIR}
|
||
|
chown root:hadoop ${HADOOP_LOG_DIR}
|
||
|
chmod 775 ${HADOOP_LOG_DIR}
|
||
|
chown hdfs:hadoop ${HADOOP_LOG_DIR}/hdfs
|
||
|
chown mapred:hadoop ${HADOOP_LOG_DIR}/mapred
|
||
|
echo "Configuration setup is completed."
|
||
|
if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
|
||
|
echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
|
||
|
fi
|
||
|
else
|
||
|
echo
|
||
|
echo "Configuration file has been generated, please copy:"
|
||
|
echo
|
||
|
echo "core-site.xml"
|
||
|
echo "hdfs-site.xml"
|
||
|
echo "mapred-site.xml"
|
||
|
echo "hadoop-env.sh"
|
||
|
echo
|
||
|
echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
|
||
|
fi
|