#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. bin=`dirname "$0"` bin=`cd "$bin"; pwd` if [ "$HADOOP_HOME" != "" ]; then echo "Warning: \$HADOOP_HOME is deprecated." echo fi . "$bin"/../libexec/hadoop-config.sh usage() { echo " usage: $0 Optional parameters: --auto Setup automatically --default Generate default config --conf-dir=/etc/hadoop Set config directory --datanode-dir=/var/lib/hadoop/hdfs/datanode Set datanode directory -h Display this message --jobtracker-url=hostname:9001 Set jobtracker url --log-dir=/var/log/hadoop Set log directory --hdfs-dir=/var/lib/hadoop/hdfs Set hdfs directory --mapred-dir=/var/lib/hadoop/mapred Set mapreduce directory --namenode-dir=/var/lib/hadoop/hdfs/namenode Set namenode directory --namenode-url=hdfs://hostname:9000/ Set namenode url --replication=3 Set replication factor --taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler Set task scheduler " exit 1 } template_generator() { REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})' cat $1 | while read line ; do while [[ "$line" =~ $REGEX ]] ; do LHS=${BASH_REMATCH[1]} RHS="$(eval echo "\"$LHS\"")" line=${line//$LHS/$RHS} done echo $line >> $2 done } OPTS=$(getopt \ -n $0 \ -o '' \ -l 'auto' \ -l 'conf-dir:' \ -l 'default' \ -l 'hdfs-dir:' \ -l 'namenode-dir:' \ -l 'datanode-dir:' \ -l 'mapred-dir:' \ -l 'namenode-url:' \ -l 'jobtracker-url:' \ -l 'log-dir:' \ -l 'replication:' \ -l 'taskscheduler:' \ -o 'h' \ -- "$@") if [ $? != 0 ] ; then usage fi # Make sure the HADOOP_LOG_DIR is not picked up from user environment. unset HADOOP_LOG_DIR # Parse script parameters eval set -- "${OPTS}" while true ; do case "$1" in --auto) AUTOSETUP=1 AUTOMATED=1 shift ;; --conf-dir) HADOOP_CONF_DIR=$2; shift 2 AUTOMATED=1 ;; --default) AUTOMATED=1; shift ;; -h) usage ;; --hdfs-dir) HADOOP_HDFS_DIR=$2; shift 2 AUTOMATED=1 ;; --namenode-dir) HADOOP_NN_DIR=$2; shift 2 AUTOMATED=1 ;; --datanode-dir) HADOOP_DN_DIR=$2; shift 2 AUTOMATED=1 ;; --mapred-dir) HADOOP_MAPRED_DIR=$2; shift 2 AUTOMATED=1 ;; --namenode-url) HADOOP_NN_HOST=$2; shift 2 AUTOMATED=1 ;; --jobtracker-url) HADOOP_JT_HOST=$2; shift 2 AUTOMATED=1 ;; --log-dir) HADOOP_LOG_DIR=$2; shift 2 AUTOMATED=1 ;; --replication) HADOOP_REPLICATION=$2; shift 2 AUTOMATED=1 ;; --taskscheduler) HADOOP_TASK_SCHEDULER=$2; shift 2 AUTOMATED=1 ;; --) shift ; break ;; *) echo "Unknown option: $1" usage exit 1 ;; esac done # Fill in default values, if parameters have not been defined. AUTOSETUP=${AUTOSETUP:-1} JAVA_HOME=${JAVA_HOME:-/usr/java/default} HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://`hostname`:9000/} HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode} HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode} HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`:9001} HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs} HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred} HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop} HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop} HADOOP_REPLICATION=${HADOOP_RELICATION:-3} HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler} # Interactive setup wizard if [ "${AUTOMATED}" != "1" ]; then echo "Setup Hadoop Configuration" echo echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) " read USER_HADOOP_CONF_DIR echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) " read USER_HADOOP_LOG_DIR echo -n "What is the url of the namenode? (${HADOOP_NN_HOST}) " read USER_HADOOP_NN_HOST echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) " read USER_HADOOP_NN_DIR echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) " read USER_HADOOP_DN_DIR echo -n "What is the url of the jobtracker? (${HADOOP_JT_HOST}) " read USER_HADOOP_JT_HOST echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) " read USER_HADOOP_MAPRED_DIR echo -n "Which taskscheduler would you like? (${HADOOP_TASK_SCHEDULER}) " read USER_HADOOP_TASK_SCHEDULER echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) " read USER_JAVA_HOME echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) " read USER_AUTOSETUP echo JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME} HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST} HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR} HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR} HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST} HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR} HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR} HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler} HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR} HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR} AUTOSETUP=${USER_AUTOSETUP:-y} echo "Review your choices:" echo echo "Config directory : ${HADOOP_CONF_DIR}" echo "Log directory : ${HADOOP_LOG_DIR}" echo "Namenode url : ${HADOOP_NN_HOST}" echo "Namenode directory : ${HADOOP_NN_DIR}" echo "Datanode directory : ${HADOOP_DN_DIR}" echo "Jobtracker url : ${HADOOP_JT_HOST}" echo "Mapreduce directory : ${HADOOP_MAPRED_DIR}" echo "Task scheduler : ${HADOOP_TASK_SCHEDULER}" echo "JAVA_HOME directory : ${JAVA_HOME}" echo "Create dirs/copy conf files : ${AUTOSETUP}" echo echo -n "Proceed with generate configuration? (y/N) " read CONFIRM if [ "${CONFIRM}" != "y" ]; then echo "User aborted setup, exiting..." exit 1 fi fi if [ "${AUTOSETUP}" == "1" ]; then # If user wants to setup local system automatically, # set config file generation location to HADOOP_CONF_DIR. DEST=${HADOOP_CONF_DIR} else # If user is only interested to generate config file locally, # place config files in the current working directory. DEST=`pwd` fi # remove existing config file, they are existed in current directory. rm -f ${DEST}/core-site.xml >/dev/null rm -f ${DEST}/hdfs-site.xml >/dev/null rm -f ${DEST}/mapred-site.xml >/dev/null rm -f ${DEST}/hadoop-env.sh >/dev/null # Generate config file with specified parameters. template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/core-site.xml ${DEST}/core-site.xml template_generator ${HADOOP_PREFIX}/share/hadoop/hdfs/templates/hdfs-site.xml ${DEST}/hdfs-site.xml template_generator ${HADOOP_PREFIX}/share/hadoop/mapreduce/templates/mapred-site.xml ${DEST}/mapred-site.xml template_generator ${HADOOP_CONF_DIR}/hadoop-env.sh.template ${DEST}/hadoop-env.sh chown root:hadoop ${DEST}/hadoop-env.sh chmod 755 ${DEST}/hadoop-env.sh # Setup directory path and copy config files, if AUTOSETUP is chosen. if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then mkdir -p ${HADOOP_HDFS_DIR} mkdir -p ${HADOOP_NN_DIR} mkdir -p ${HADOOP_DN_DIR} mkdir -p ${HADOOP_MAPRED_DIR} mkdir -p ${HADOOP_CONF_DIR} mkdir -p ${HADOOP_LOG_DIR} mkdir -p ${HADOOP_LOG_DIR}/hdfs mkdir -p ${HADOOP_LOG_DIR}/mapred chown hdfs:hadoop ${HADOOP_HDFS_DIR} chown hdfs:hadoop ${HADOOP_NN_DIR} chown hdfs:hadoop ${HADOOP_DN_DIR} chown mapred:hadoop ${HADOOP_MAPRED_DIR} chown root:hadoop ${HADOOP_LOG_DIR} chmod 775 ${HADOOP_LOG_DIR} chown hdfs:hadoop ${HADOOP_LOG_DIR}/hdfs chown mapred:hadoop ${HADOOP_LOG_DIR}/mapred echo "Configuration setup is completed." if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then echo "Proceed to run hadoop-setup-hdfs.sh on namenode." fi else echo echo "Configuration file has been generated, please copy:" echo echo "core-site.xml" echo "hdfs-site.xml" echo "mapred-site.xml" echo "hadoop-env.sh" echo echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode." fi