Steve Loughran 7328c34ba5
MAPREDUCE-7341. Add an intermediate manifest committer for Azure and GCS
This is a mapreduce/spark output committer optimized for
performance and correctness on Azure ADLS Gen 2 storage
(via the abfs connector) and Google Cloud Storage
(via the external gcs connector library).

* It is safe to use with HDFS, however it has not been optimized
for that use.
* It is *not* safe for use with S3, and will fail if an attempt
is made to do so.

Contributed by Steve Loughran

Change-Id: I6f3502e79c578b9fd1a8c1485f826784b5421fca
2022-03-17 11:24:13 +00:00

177 lines
6.0 KiB
XML

<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hadoop-mapreduce-client</artifactId>
<groupId>org.apache.hadoop</groupId>
<version>3.4.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.4.0-SNAPSHOT</version>
<name>Apache Hadoop MapReduce Core</name>
<properties>
<!-- Needed for generating FindBugs warnings using parent pom -->
<mr.basedir>${project.parent.basedir}/..</mr.basedir>
<should.run.jdiff>true</should.run.jdiff>
<dev-support.relative.dir>../../dev-support</dev-support.relative.dir>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-client</artifactId>
<exclusions>
<exclusion>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.skyscreamer</groupId>
<artifactId>jsonassert</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
<configuration>
<excludes>
<exclude>**/mapred-queues.xml</exclude>
</excludes>
</configuration>
<phase>test-compile</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<sourcepath>target/generated-sources/avro/</sourcepath>
<excludePackageNames>org.apache.hadoop.mapreduce.jobhistory</excludePackageNames>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<systemPropertyVariables>
<log4j.configuration>file:///${project.basedir}/src/test/resources/log4j.properties</log4j.configuration>
<!-- project directory (target/) on a maven build. -->
<project.build.directory>${project.build.directory}</project.build.directory>
<!-- this property is used in hadoop-azure with separate paths for parallel runs -->
<test.build.dir>${project.build.directory}</test.build.dir>
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>protocol</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<phase>pre-site</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<tasks>
<copy file="src/main/resources/mapred-default.xml" todir="src/site/resources"/>
<copy file="../../../hadoop-common-project/hadoop-common/src/main/xsl/configuration.xsl" todir="src/site/resources"/>
</tasks>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>.gitattributes</exclude>
<exclude>src/test/resources/recordSpanningMultipleSplits.txt</exclude>
<exclude>src/test/resources/testBOM.txt</exclude>
<exclude>src/test/resources/TestSafeguardSplittingUnsplittableFiles.txt.gz</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<configuration>
<filesets>
<fileset>
<directory>src/site/resources</directory>
<includes>
<include>configuration.xsl</include>
<include>mapred-default.xml</include>
</includes>
<followSymlinks>false</followSymlinks>
</fileset>
</filesets>
</configuration>
</plugin>
</plugins>
</build>
</project>