From 65681d5716e46c6259e79aeca10f4a4243285029 Mon Sep 17 00:00:00 2001 From: Jitendra Nath Pandey Date: Tue, 29 Nov 2011 23:43:57 +0000 Subject: [PATCH] HADOOP-7804. Enable hadoop config generator to set configurations to enable short circuit read. Contributed by Arpit Gupta. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1208144 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../src/main/packages/hadoop-setup-conf.sh | 58 ++++++++++++++++++- .../packages/templates/conf/hdfs-site.xml | 2 +- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 8ddfb2c4bb..382c086a30 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -126,6 +126,9 @@ Release 0.23.1 - Unreleased HADOOP-7424. Log an error if the topology script doesn't handle multiple args. (Uma Maheswara Rao G via eli) + HADOOP-7804. Enable hadoop config generator to set configurations to enable + short circuit read. (Arpit Gupta via jitendra) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh index 0daac37942..1dbe87359f 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh +++ b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh @@ -67,6 +67,10 @@ usage: $0 This value should be <= mapred.cluster.max.map.memory.mb --mapreduce-reduce-memory-mb=memory Virtual memory, of a single reduce slot for a job. Defaults to -1 This value should be <= mapred.cluster.max.reduce.memory.mb + --dfs-datanode-dir-perm=700 Set the permission for the datanode data directories. Defaults to 700 + --dfs-block-local-path-access-user=user User for which you want to enable shortcircuit read. + --dfs-client-read-shortcircuit=true/false Enable shortcircuit read for the client. Will default to true if the shortcircuit user is set. + --dfs-client-read-shortcircuit-skip-checksum=false/true Disable checking of checksum when shortcircuit read is taking place. Defaults to false. " exit 1 } @@ -124,7 +128,7 @@ function addPropertyToXMLConf local finalVal=$5 #create the property text, make sure the / are escaped - propText="\n$property<\/name>\n$propValue<\/value>" + propText="\n$property<\/name>\n$propValue<\/value>\n" #if description is not empty add it if [ ! -z $desc ] then @@ -146,6 +150,28 @@ function addPropertyToXMLConf sed -i "s|$endText|$propText$endText|" $file } +########################################## +# Function to setup up the short circuit read settings +######################################### +function setupShortCircuitRead +{ + local conf_file="${HADOOP_CONF_DIR}/hdfs-site.xml" + #if the shortcircuit user is not set then return + if [ -z $DFS_BLOCK_LOCAL_PATH_ACCESS_USER ] + then + return + fi + + #set the defaults if values not present + DFS_CLIENT_READ_SHORTCIRCUIT=${DFS_CLIENT_READ_SHORTCIRCUIT:-false} + DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM:-false} + + #add the user to the conf file + addPropertyToXMLConf "$conf_file" "dfs.block.local-path-access.user" "$DFS_BLOCK_LOCAL_PATH_ACCESS_USER" + addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit" "$DFS_CLIENT_READ_SHORTCIRCUIT" + addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit.skip.checksum" "$DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM" +} + ########################################## # Function to setup up the proxy user settings ######################################### @@ -217,6 +243,10 @@ OPTS=$(getopt \ -l 'mapreduce-jobtracker-maxreducememory-mb:' \ -l 'mapreduce-map-memory-mb:' \ -l 'mapreduce-reduce-memory-mb:' \ + -l 'dfs-datanode-dir-perm:' \ + -l 'dfs-block-local-path-access-user:' \ + -l 'dfs-client-read-shortcircuit:' \ + -l 'dfs-client-read-shortcircuit-skip-checksum:' \ -o 'h' \ -- "$@") @@ -376,6 +406,22 @@ while true ; do MAPREDUCE_REDUCE_MEMORY_MB=$2; shift 2 AUTOMATED=1 ;; + --dfs-datanode-dir-perm) + DFS_DATANODE_DIR_PERM=$2; shift 2 + AUTOMATED=1 + ;; + --dfs-block-local-path-access-user) + DFS_BLOCK_LOCAL_PATH_ACCESS_USER=$2; shift 2 + AUTOMATED=1 + ;; + --dfs-client-read-shortcircuit) + DFS_CLIENT_READ_SHORTCIRCUIT=$2; shift 2 + AUTOMATED=1 + ;; + --dfs-client-read-shortcircuit-skip-checksum) + DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=$2; shift 2 + AUTOMATED=1 + ;; --) shift ; break ;; @@ -421,6 +467,8 @@ DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false} KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM} SECURITY_TYPE=${SECURITY_TYPE:-simple} KINIT=${KINIT:-/usr/kerberos/bin/kinit} +#deault the data dir perm to 700 +DFS_DATANODE_DIR_PERM=${DFS_DATANODE_DIR_PERM:-700} if [ "${SECURITY_TYPE}" = "kerberos" ]; then TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController" HADOOP_DN_ADDR="0.0.0.0:1019" @@ -561,7 +609,10 @@ if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then #setup up the proxy users setupProxyUsers - + + #setup short circuit read + setupShortCircuitRead + #set the owner of the hadoop dir to root chown root ${HADOOP_PREFIX} chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh @@ -611,6 +662,9 @@ else #setup up the proxy users setupProxyUsers + #setup short circuit read + setupShortCircuitRead + chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh #set taskcontroller diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml index e1bb595874..64e06a5414 100644 --- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml +++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml @@ -202,7 +202,7 @@ dfs.datanode.data.dir.perm - 700 + ${DFS_DATANODE_DIR_PERM} The permissions that should be there on dfs.data.dir directories. The datanode will not come up if the permissions are different on existing dfs.data.dir directories. If the directories