diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 855a77fbd6..43a8e48334 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -325,6 +325,9 @@ Trunk (Unreleased)
HADOOP-9249. hadoop-maven-plugins version-info goal causes build failure
when running with Clover. (Chris Nauroth via suresh)
+ HADOOP-9264. Port change to use Java untar API on Windows from
+ branch-1-win to trunk. (Chris Nauroth via suresh)
+
OPTIMIZATIONS
HADOOP-7761. Improve the performance of raw comparisons. (todd)
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index e154d4a820..545c0cb5da 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -241,6 +241,11 @@
test-jar
test
+
+ org.apache.commons
+ commons-compress
+ 1.4
+
@@ -381,6 +386,23 @@
+
+ copy-test-tarballs
+ process-test-resources
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
pre-site
@@ -485,6 +507,7 @@
src/test/all-tests
src/test/resources/kdc/ldif/users.ldif
src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c
+ src/test/java/org/apache/hadoop/fs/test-untar.tgz
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
index 4593eedb9f..19c19cd2b6 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
@@ -21,9 +21,12 @@
import java.io.*;
import java.util.Arrays;
import java.util.Enumeration;
+import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@@ -624,14 +627,28 @@ public static void unZip(File inFile, File unzipDir) throws IOException {
* @throws IOException
*/
public static void unTar(File inFile, File untarDir) throws IOException {
- if (!untarDir.mkdirs()) {
+ if (!untarDir.mkdirs()) {
if (!untarDir.isDirectory()) {
throw new IOException("Mkdirs failed to create " + untarDir);
}
}
- StringBuilder untarCommand = new StringBuilder();
boolean gzipped = inFile.toString().endsWith("gz");
+ if(Shell.WINDOWS) {
+ // Tar is not native to Windows. Use simple Java based implementation for
+ // tests and simple tar archives
+ unTarUsingJava(inFile, untarDir, gzipped);
+ }
+ else {
+ // spawn tar utility to untar archive for full fledged unix behavior such
+ // as resolving symlinks in tar archives
+ unTarUsingTar(inFile, untarDir, gzipped);
+ }
+ }
+
+ private static void unTarUsingTar(File inFile, File untarDir,
+ boolean gzipped) throws IOException {
+ StringBuffer untarCommand = new StringBuffer();
if (gzipped) {
untarCommand.append(" gzip -dc '");
untarCommand.append(FileUtil.makeShellPath(inFile));
@@ -656,7 +673,62 @@ public static void unTar(File inFile, File untarDir) throws IOException {
". Tar process exited with exit code " + exitcode);
}
}
+
+ private static void unTarUsingJava(File inFile, File untarDir,
+ boolean gzipped) throws IOException {
+ InputStream inputStream = null;
+ if (gzipped) {
+ inputStream = new BufferedInputStream(new GZIPInputStream(
+ new FileInputStream(inFile)));
+ } else {
+ inputStream = new BufferedInputStream(new FileInputStream(inFile));
+ }
+ TarArchiveInputStream tis = new TarArchiveInputStream(inputStream);
+
+ for (TarArchiveEntry entry = tis.getNextTarEntry(); entry != null;) {
+ unpackEntries(tis, entry, untarDir);
+ entry = tis.getNextTarEntry();
+ }
+ }
+
+ private static void unpackEntries(TarArchiveInputStream tis,
+ TarArchiveEntry entry, File outputDir) throws IOException {
+ if (entry.isDirectory()) {
+ File subDir = new File(outputDir, entry.getName());
+ if (!subDir.mkdir() && !subDir.isDirectory()) {
+ throw new IOException("Mkdirs failed to create tar internal dir "
+ + outputDir);
+ }
+
+ for (TarArchiveEntry e : entry.getDirectoryEntries()) {
+ unpackEntries(tis, e, subDir);
+ }
+
+ return;
+ }
+
+ File outputFile = new File(outputDir, entry.getName());
+ if (!outputDir.exists()) {
+ if (!outputDir.mkdirs()) {
+ throw new IOException("Mkdirs failed to create tar internal dir "
+ + outputDir);
+ }
+ }
+
+ int count;
+ byte data[] = new byte[2048];
+ BufferedOutputStream outputStream = new BufferedOutputStream(
+ new FileOutputStream(outputFile));
+
+ while ((count = tis.read(data)) != -1) {
+ outputStream.write(data, 0, count);
+ }
+
+ outputStream.flush();
+ outputStream.close();
+ }
+
/**
* Class for creating hardlinks.
* Supports Unix, Cygwin, WindXP.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java
index a64b45d80f..e73c644fb0 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java
@@ -546,4 +546,44 @@ public void testGetDU() throws IOException {
long expected = 2 * (3 + System.getProperty("line.separator").length());
Assert.assertEquals(expected, du);
}
+
+ private void doUntarAndVerify(File tarFile, File untarDir)
+ throws IOException {
+ if (untarDir.exists() && !FileUtil.fullyDelete(untarDir)) {
+ throw new IOException("Could not delete directory '" + untarDir + "'");
+ }
+ FileUtil.unTar(tarFile, untarDir);
+
+ String parentDir = untarDir.getCanonicalPath() + Path.SEPARATOR + "name";
+ File testFile = new File(parentDir + Path.SEPARATOR + "version");
+ Assert.assertTrue(testFile.exists());
+ Assert.assertTrue(testFile.length() == 0);
+ String imageDir = parentDir + Path.SEPARATOR + "image";
+ testFile = new File(imageDir + Path.SEPARATOR + "fsimage");
+ Assert.assertTrue(testFile.exists());
+ Assert.assertTrue(testFile.length() == 157);
+ String currentDir = parentDir + Path.SEPARATOR + "current";
+ testFile = new File(currentDir + Path.SEPARATOR + "fsimage");
+ Assert.assertTrue(testFile.exists());
+ Assert.assertTrue(testFile.length() == 4331);
+ testFile = new File(currentDir + Path.SEPARATOR + "edits");
+ Assert.assertTrue(testFile.exists());
+ Assert.assertTrue(testFile.length() == 1033);
+ testFile = new File(currentDir + Path.SEPARATOR + "fstime");
+ Assert.assertTrue(testFile.exists());
+ Assert.assertTrue(testFile.length() == 8);
+ }
+
+ @Test
+ public void testUntar() throws IOException {
+ String tarGzFileName = System.getProperty("test.cache.data",
+ "build/test/cache") + "/test-untar.tgz";
+ String tarFileName = System.getProperty("test.cache.data",
+ "build/test/cache") + "/test-untar.tar";
+ String dataDir = System.getProperty("test.build.data", "build/test/data");
+ File untarDir = new File(dataDir, "untarDir");
+
+ doUntarAndVerify(new File(tarGzFileName), untarDir);
+ doUntarAndVerify(new File(tarFileName), untarDir);
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tar b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tar
new file mode 100644
index 0000000000..949e985c73
Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tar differ
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tgz b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tgz
new file mode 100644
index 0000000000..9e9ef40f6f
Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tgz differ