HADOOP-7642. create hadoop-dist module where TAR stitching would happen. Contributed by Thomas White.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1182151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Alejandro Abdelnur 2011-10-12 00:27:32 +00:00
parent ac64df85a9
commit 3f998db918
9 changed files with 329 additions and 152 deletions

View File

@ -0,0 +1,101 @@
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>hadoop-mapreduce-dist</id>
<formats>
<format>tar.gz</format>
</formats>
<includeBaseDirectory>true</includeBaseDirectory>
<!-- TODO: this layout is wrong. We need module specific bin files in module specific dirs -->
<fileSets>
<fileSet>
<directory>hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/classes/bin</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>container-executor</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>hadoop-yarn/bin</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>bin</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>hadoop-yarn/conf</directory>
<outputDirectory>conf</outputDirectory>
<includes>
<include>**/*</include>
</includes>
</fileSet>
<fileSet>
<outputDirectory>sources</outputDirectory>
<excludes>
<exclude>**/*.jar</exclude>
<exclude>**/target/**</exclude>
<!-- scripts to include later for setting fileMode -->
<exclude>**/bin/*</exclude>
<exclude>**/scripts/*</exclude>
<!-- images that we don't need (and cause problems for our tools) -->
<exclude>**/dt-*/images/**</exclude>
<!-- until the code that does this is fixed -->
<exclude>**/file:/**</exclude>
<exclude>**/SecurityAuth.audit*</exclude>
</excludes>
<includes>
<include>assembly/**</include>
<include>pom.xml</include>
<include>build*.xml</include>
<include>ivy.xml</include>
<include>ivy/**</include>
<include>INSTALL</include>
<include>LICENSE.txt</include>
<include>mr-client/**</include>
<include>hadoop-yarn/**</include>
<include>src/**</include>
</includes>
</fileSet>
<fileSet>
<outputDirectory>sources</outputDirectory>
<includes>
<include>**/bin/*</include>
<include>**/scripts/*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
<moduleSets>
<moduleSet>
<excludes>
<exclude>org.apache.hadoop:hadoop-yarn-server-tests</exclude>
</excludes>
<binaries>
<outputDirectory>modules</outputDirectory>
<includeDependencies>false</includeDependencies>
<unpack>false</unpack>
</binaries>
</moduleSet>
</moduleSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>/lib</outputDirectory>
<!-- Exclude hadoop artifacts. They will be found via HADOOP* env -->
<excludes>
<exclude>org.apache.hadoop:hadoop-common</exclude>
<exclude>org.apache.hadoop:hadoop-hdfs</exclude>
</excludes>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -445,6 +445,9 @@ Release 0.23.0 - Unreleased
HADOOP-7627. Improve MetricsAsserts to give more understandable output
on failure. (todd)
HADOOP-7642. create hadoop-dist module where TAR stitching would happen.
(Thomas White via tucu)
OPTIMIZATIONS
HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole

144
hadoop-dist/pom.xml Normal file
View File

@ -0,0 +1,144 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project>
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>0.24.0-SNAPSHOT</version>
<relativePath>../hadoop-project</relativePath>
</parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-dist</artifactId>
<version>0.24.0-SNAPSHOT</version>
<description>Apache Hadoop Distribution</description>
<name>Apache Hadoop Distribution</name>
<packaging>jar</packaging>
<!-- Using dependencies to ensure this module is the last one -->
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<includes>
<include>pom.xml</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>dist</id>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target if="tar">
<echo file="${project.build.directory}/dist-tar-stitching.sh">
run() {
echo "\$ ${@}"
"${@}"
if [ $? != 0 ]; then
echo
echo "Failed!"
echo
exit $?
fi
}
ROOT=`cd ${basedir}/..;pwd`
echo
echo "Current directory `pwd`"
echo
run rm -rf hadoop-${project.version}
run mkdir hadoop-${project.version}
run cd hadoop-${project.version}
run tar xzf $ROOT/hadoop-common-project/hadoop-common/target/hadoop-common-${project.version}.tar.gz --strip-components 1
run tar xzf $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}.tar.gz --strip-components 1
run tar xzf $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}.tar.gz --strip-components 1
COMMON_LIB=share/hadoop/common/lib
MODULES=../../../../modules
run ln -s $MODULES/hadoop-mapreduce-client-app-${project.version}.jar $COMMON_LIB
run ln -s $MODULES/hadoop-yarn-api-${project.version}.jar $COMMON_LIB
run ln -s $MODULES/hadoop-mapreduce-client-common-${project.version}.jar $COMMON_LIB
run ln -s $MODULES/hadoop-yarn-common-${project.version}.jar $COMMON_LIB
run ln -s $MODULES/hadoop-mapreduce-client-core-${project.version}.jar $COMMON_LIB
run ln -s $MODULES/hadoop-yarn-server-common-${project.version}.jar $COMMON_LIB
run ln -s $MODULES/hadoop-mapreduce-client-jobclient-${project.version}.jar $COMMON_LIB
run cd -
run tar czf hadoop-${project.version}.tar.gz hadoop-${project.version}
echo
echo "Hadoop dist tar available at: ${project.build.directory}/hadoop-${project.version}.tar.gz"
echo
</echo>
<exec executable="sh" dir="${project.build.directory}" failonerror="true">
<arg line="./dist-tar-stitching.sh"/>
</exec>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -2,49 +2,31 @@ To compile Hadoop Mapreduce next following, do the following:
Step 1) Install dependencies for yarn
See http://svn.apache.org/repos/asf/hadoop/common/trunk/hadoop-mapreduce/hadoop-yarn/README
See http://svn.apache.org/repos/asf/hadoop/common/trunk/hadoop-mapreduce-porject/hadoop-yarn/README
Make sure protbuf library is in your library path or set: export LD_LIBRARY_PATH=/usr/local/lib
Step 2) Checkout
svn checkout http://svn.apache.org/repos/asf/hadoop/common/trunk
Step 3) Build common
Step 3) Build
Go to common directory - choose your regular common build command
Example: mvn clean install package -Pbintar -DskipTests
Go to common directory - choose your regular common build command. For example:
Step 4) Build HDFS
Go to hdfs directory
ant veryclean mvn-install -Dresolvers=internal
Step 5) Build yarn and mapreduce
Go to mapreduce directory
export MAVEN_OPTS=-Xmx512m
mvn clean package -Pdist -Dtar -DskipTests -Pnative
mvn clean install assembly:assembly -DskipTests
You can omit -Pnative it you don't want to build native packages.
Copy in build.properties if appropriate - make sure eclipse.home not set
ant veryclean tar -Dresolvers=internal
Step 4) Untar the tarball from hadoop-dist/target/ into a clean and different
directory, say YARN_HOME.
You will see a tarball in
ls target/hadoop-mapreduce-0.24.0-SNAPSHOT-all.tar.gz
Step 6) Untar the tarball in a clean and different directory.
say YARN_HOME.
Make sure you aren't picking up avro-1.3.2.jar, remove:
$HADOOP_COMMON_HOME/share/hadoop/common/lib/avro-1.3.2.jar
$YARN_HOME/lib/avro-1.3.2.jar
Step 7)
Install hdfs/common and start hdfs
Step 5)
Start hdfs
To run Hadoop Mapreduce next applications:
Step 8) export the following variables to where you have things installed:
Step 6) export the following variables to where you have things installed:
You probably want to export these in hadoop-env.sh and yarn-env.sh also.
export HADOOP_MAPRED_HOME=<mapred loc>
@ -54,7 +36,7 @@ export YARN_HOME=directory where you untarred yarn
export HADOOP_CONF_DIR=<conf loc>
export YARN_CONF_DIR=$HADOOP_CONF_DIR
Step 9) Setup config: for running mapreduce applications, which now are in user land, you need to setup nodemanager with the following configuration in your yarn-site.xml before you start the nodemanager.
Step 7) Setup config: for running mapreduce applications, which now are in user land, you need to setup nodemanager with the following configuration in your yarn-site.xml before you start the nodemanager.
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce.shuffle</value>
@ -65,31 +47,21 @@ Step 9) Setup config: for running mapreduce applications, which now are in user
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
Step 10) Modify mapred-site.xml to use yarn framework
Step 8) Modify mapred-site.xml to use yarn framework
<property>
<name> mapreduce.framework.name</name>
<value>yarn</value>
</property>
Step 11) Create the following symlinks in $HADOOP_COMMON_HOME/share/hadoop/common/lib
Step 9) cd $YARN_HOME
ln -s $YARN_HOME/modules/hadoop-mapreduce-client-app-0.24.0-SNAPSHOT.jar .
ln -s $YARN_HOME/modules/hadoop-yarn-api-0.24.0-SNAPSHOT.jar .
ln -s $YARN_HOME/modules/hadoop-mapreduce-client-common-0.24.0-SNAPSHOT.jar .
ln -s $YARN_HOME/modules/hadoop-yarn-common-0.24.0-SNAPSHOT.jar .
ln -s $YARN_HOME/modules/hadoop-mapreduce-client-core-0.24.0-SNAPSHOT.jar .
ln -s $YARN_HOME/modules/hadoop-yarn-server-common-0.24.0-SNAPSHOT.jar .
ln -s $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-0.24.0-SNAPSHOT.jar .
Step 10) bin/yarn-daemon.sh start resourcemanager
Step 12) cd $YARN_HOME
Step 11) bin/yarn-daemon.sh start nodemanager
Step 13) bin/yarn-daemon.sh start resourcemanager
Step 12) bin/yarn-daemon.sh start historyserver
Step 14) bin/yarn-daemon.sh start nodemanager
Step 15) bin/yarn-daemon.sh start historyserver
Step 16) You are all set, an example on how to run a mapreduce job is:
Step 13) You are all set, an example on how to run a mapreduce job is:
cd $HADOOP_MAPRED_HOME
ant examples -Dresolvers=internal
$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-examples-0.24.0-SNAPSHOT.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.clientfactory.class.name=org.apache.hadoop.mapred.YarnClientFactory -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $YARN_HOME/modules/hadoop-mapreduce-client-jobclient-0.24.0-SNAPSHOT.jar output

View File

@ -1,101 +0,0 @@
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>all</id>
<formats>
<format>tar.gz</format>
</formats>
<includeBaseDirectory>true</includeBaseDirectory>
<!-- TODO: this layout is wrong. We need module specific bin files in module specific dirs -->
<fileSets>
<fileSet>
<directory>hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/c/container-executor</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>container-executor</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>hadoop-yarn/bin</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>bin</directory>
<outputDirectory>bin</outputDirectory>
<includes>
<include>*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>hadoop-yarn/conf</directory>
<outputDirectory>conf</outputDirectory>
<includes>
<include>**/*</include>
</includes>
</fileSet>
<fileSet>
<outputDirectory>sources</outputDirectory>
<excludes>
<exclude>**/*.jar</exclude>
<exclude>**/target/**</exclude>
<!-- scripts to include later for setting fileMode -->
<exclude>**/bin/*</exclude>
<exclude>**/scripts/*</exclude>
<!-- images that we don't need (and cause problems for our tools) -->
<exclude>**/dt-*/images/**</exclude>
<!-- until the code that does this is fixed -->
<exclude>**/file:/**</exclude>
<exclude>**/SecurityAuth.audit*</exclude>
</excludes>
<includes>
<include>assembly/**</include>
<include>pom.xml</include>
<include>build*.xml</include>
<include>ivy.xml</include>
<include>ivy/**</include>
<include>INSTALL</include>
<include>LICENSE.txt</include>
<include>mr-client/**</include>
<include>hadoop-yarn/**</include>
<include>src/**</include>
</includes>
</fileSet>
<fileSet>
<outputDirectory>sources</outputDirectory>
<includes>
<include>**/bin/*</include>
<include>**/scripts/*</include>
</includes>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
<moduleSets>
<moduleSet>
<excludes>
<exclude>org.apache.hadoop:hadoop-yarn-server-tests</exclude>
</excludes>
<binaries>
<outputDirectory>modules</outputDirectory>
<includeDependencies>false</includeDependencies>
<unpack>false</unpack>
</binaries>
</moduleSet>
</moduleSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>/lib</outputDirectory>
<!-- Exclude hadoop artifacts. They will be found via HADOOP* env -->
<excludes>
<exclude>org.apache.hadoop:hadoop-common</exclude>
<exclude>org.apache.hadoop:hadoop-hdfs</exclude>
</excludes>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -25,6 +25,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<test.logs>true</test.logs>
<test.timeout>600000</test.timeout>
<hadoop.assemblies.version>${project.version}</hadoop.assemblies.version>
<hadoop-common.version>0.24.0-SNAPSHOT</hadoop-common.version>
<hadoop-hdfs.version>0.24.0-SNAPSHOT</hadoop-hdfs.version>
<yarn.version>0.24.0-SNAPSHOT</yarn.version>
@ -317,13 +318,33 @@
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${hadoop.assemblies.version}</version>
</dependency>
</dependencies>
<configuration>
<tarLongFileMode>gnu</tarLongFileMode>
<descriptors>
<descriptor>assembly/all.xml</descriptor>
</descriptors>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-mapreduce-dist</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>tar</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>

View File

@ -26,6 +26,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<test.logs>true</test.logs>
<test.timeout>600000</test.timeout>
<hadoop.assemblies.version>${project.version}</hadoop.assemblies.version>
<hadoop-common.version>0.24.0-SNAPSHOT</hadoop-common.version>
<hadoop-hdfs.version>0.24.0-SNAPSHOT</hadoop-hdfs.version>
<hadoop-mapreduce.version>0.24.0-SNAPSHOT</hadoop-mapreduce.version>
@ -320,13 +321,33 @@
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${hadoop.assemblies.version}</version>
</dependency>
</dependencies>
<configuration>
<tarLongFileMode>gnu</tarLongFileMode>
<descriptors>
<descriptor>assembly/all.xml</descriptor>
</descriptors>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-mapreduce-dist</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>tar</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.atlassian.maven.plugins</groupId>

View File

@ -114,6 +114,21 @@
<artifactId>hadoop-auth</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>

View File

@ -41,6 +41,7 @@
<module>hadoop-hdfs-project</module>
<module>hadoop-mapreduce-project</module>
<module>hadoop-tools</module>
<module>hadoop-dist</module>
</modules>
<build>