HADOOP-7804. Enable hadoop config generator to set configurations to enable short circuit read. Contributed by Arpit Gupta.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1208144 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jitendra Nath Pandey 2011-11-29 23:43:57 +00:00
parent 0398a9e88d
commit 65681d5716
3 changed files with 60 additions and 3 deletions

View File

@ -126,6 +126,9 @@ Release 0.23.1 - Unreleased
HADOOP-7424. Log an error if the topology script doesn't handle multiple args.
(Uma Maheswara Rao G via eli)
HADOOP-7804. Enable hadoop config generator to set configurations to enable
short circuit read. (Arpit Gupta via jitendra)
OPTIMIZATIONS
BUG FIXES

View File

@ -67,6 +67,10 @@ usage: $0 <parameters>
This value should be <= mapred.cluster.max.map.memory.mb
--mapreduce-reduce-memory-mb=memory Virtual memory, of a single reduce slot for a job. Defaults to -1
This value should be <= mapred.cluster.max.reduce.memory.mb
--dfs-datanode-dir-perm=700 Set the permission for the datanode data directories. Defaults to 700
--dfs-block-local-path-access-user=user User for which you want to enable shortcircuit read.
--dfs-client-read-shortcircuit=true/false Enable shortcircuit read for the client. Will default to true if the shortcircuit user is set.
--dfs-client-read-shortcircuit-skip-checksum=false/true Disable checking of checksum when shortcircuit read is taking place. Defaults to false.
"
exit 1
}
@ -124,7 +128,7 @@ function addPropertyToXMLConf
local finalVal=$5
#create the property text, make sure the / are escaped
propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>"
propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>\n"
#if description is not empty add it
if [ ! -z $desc ]
then
@ -146,6 +150,28 @@ function addPropertyToXMLConf
sed -i "s|$endText|$propText$endText|" $file
}
##########################################
# Function to setup up the short circuit read settings
#########################################
function setupShortCircuitRead
{
local conf_file="${HADOOP_CONF_DIR}/hdfs-site.xml"
#if the shortcircuit user is not set then return
if [ -z $DFS_BLOCK_LOCAL_PATH_ACCESS_USER ]
then
return
fi
#set the defaults if values not present
DFS_CLIENT_READ_SHORTCIRCUIT=${DFS_CLIENT_READ_SHORTCIRCUIT:-false}
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM:-false}
#add the user to the conf file
addPropertyToXMLConf "$conf_file" "dfs.block.local-path-access.user" "$DFS_BLOCK_LOCAL_PATH_ACCESS_USER"
addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit" "$DFS_CLIENT_READ_SHORTCIRCUIT"
addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit.skip.checksum" "$DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM"
}
##########################################
# Function to setup up the proxy user settings
#########################################
@ -217,6 +243,10 @@ OPTS=$(getopt \
-l 'mapreduce-jobtracker-maxreducememory-mb:' \
-l 'mapreduce-map-memory-mb:' \
-l 'mapreduce-reduce-memory-mb:' \
-l 'dfs-datanode-dir-perm:' \
-l 'dfs-block-local-path-access-user:' \
-l 'dfs-client-read-shortcircuit:' \
-l 'dfs-client-read-shortcircuit-skip-checksum:' \
-o 'h' \
-- "$@")
@ -376,6 +406,22 @@ while true ; do
MAPREDUCE_REDUCE_MEMORY_MB=$2; shift 2
AUTOMATED=1
;;
--dfs-datanode-dir-perm)
DFS_DATANODE_DIR_PERM=$2; shift 2
AUTOMATED=1
;;
--dfs-block-local-path-access-user)
DFS_BLOCK_LOCAL_PATH_ACCESS_USER=$2; shift 2
AUTOMATED=1
;;
--dfs-client-read-shortcircuit)
DFS_CLIENT_READ_SHORTCIRCUIT=$2; shift 2
AUTOMATED=1
;;
--dfs-client-read-shortcircuit-skip-checksum)
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=$2; shift 2
AUTOMATED=1
;;
--)
shift ; break
;;
@ -421,6 +467,8 @@ DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false}
KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM}
SECURITY_TYPE=${SECURITY_TYPE:-simple}
KINIT=${KINIT:-/usr/kerberos/bin/kinit}
#deault the data dir perm to 700
DFS_DATANODE_DIR_PERM=${DFS_DATANODE_DIR_PERM:-700}
if [ "${SECURITY_TYPE}" = "kerberos" ]; then
TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController"
HADOOP_DN_ADDR="0.0.0.0:1019"
@ -561,7 +609,10 @@ if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
#setup up the proxy users
setupProxyUsers
#setup short circuit read
setupShortCircuitRead
#set the owner of the hadoop dir to root
chown root ${HADOOP_PREFIX}
chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
@ -611,6 +662,9 @@ else
#setup up the proxy users
setupProxyUsers
#setup short circuit read
setupShortCircuitRead
chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
#set taskcontroller

View File

@ -202,7 +202,7 @@
<property>
<name>dfs.datanode.data.dir.perm</name>
<value>700</value>
<value>${DFS_DATANODE_DIR_PERM}</value>
<description>The permissions that should be there on dfs.data.dir
directories. The datanode will not come up if the permissions are
different on existing dfs.data.dir directories. If the directories