diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 855a77fbd6..43a8e48334 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -325,6 +325,9 @@ Trunk (Unreleased) HADOOP-9249. hadoop-maven-plugins version-info goal causes build failure when running with Clover. (Chris Nauroth via suresh) + HADOOP-9264. Port change to use Java untar API on Windows from + branch-1-win to trunk. (Chris Nauroth via suresh) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index e154d4a820..545c0cb5da 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -241,6 +241,11 @@ test-jar test + + org.apache.commons + commons-compress + 1.4 + @@ -381,6 +386,23 @@ + + copy-test-tarballs + process-test-resources + + run + + + + + + + + + + + + pre-site @@ -485,6 +507,7 @@ src/test/all-tests src/test/resources/kdc/ldif/users.ldif src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c + src/test/java/org/apache/hadoop/fs/test-untar.tgz diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java index 4593eedb9f..19c19cd2b6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java @@ -21,9 +21,12 @@ import java.io.*; import java.util.Arrays; import java.util.Enumeration; +import java.util.zip.GZIPInputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -624,14 +627,28 @@ public static void unZip(File inFile, File unzipDir) throws IOException { * @throws IOException */ public static void unTar(File inFile, File untarDir) throws IOException { - if (!untarDir.mkdirs()) { + if (!untarDir.mkdirs()) { if (!untarDir.isDirectory()) { throw new IOException("Mkdirs failed to create " + untarDir); } } - StringBuilder untarCommand = new StringBuilder(); boolean gzipped = inFile.toString().endsWith("gz"); + if(Shell.WINDOWS) { + // Tar is not native to Windows. Use simple Java based implementation for + // tests and simple tar archives + unTarUsingJava(inFile, untarDir, gzipped); + } + else { + // spawn tar utility to untar archive for full fledged unix behavior such + // as resolving symlinks in tar archives + unTarUsingTar(inFile, untarDir, gzipped); + } + } + + private static void unTarUsingTar(File inFile, File untarDir, + boolean gzipped) throws IOException { + StringBuffer untarCommand = new StringBuffer(); if (gzipped) { untarCommand.append(" gzip -dc '"); untarCommand.append(FileUtil.makeShellPath(inFile)); @@ -656,7 +673,62 @@ public static void unTar(File inFile, File untarDir) throws IOException { ". Tar process exited with exit code " + exitcode); } } + + private static void unTarUsingJava(File inFile, File untarDir, + boolean gzipped) throws IOException { + InputStream inputStream = null; + if (gzipped) { + inputStream = new BufferedInputStream(new GZIPInputStream( + new FileInputStream(inFile))); + } else { + inputStream = new BufferedInputStream(new FileInputStream(inFile)); + } + TarArchiveInputStream tis = new TarArchiveInputStream(inputStream); + + for (TarArchiveEntry entry = tis.getNextTarEntry(); entry != null;) { + unpackEntries(tis, entry, untarDir); + entry = tis.getNextTarEntry(); + } + } + + private static void unpackEntries(TarArchiveInputStream tis, + TarArchiveEntry entry, File outputDir) throws IOException { + if (entry.isDirectory()) { + File subDir = new File(outputDir, entry.getName()); + if (!subDir.mkdir() && !subDir.isDirectory()) { + throw new IOException("Mkdirs failed to create tar internal dir " + + outputDir); + } + + for (TarArchiveEntry e : entry.getDirectoryEntries()) { + unpackEntries(tis, e, subDir); + } + + return; + } + + File outputFile = new File(outputDir, entry.getName()); + if (!outputDir.exists()) { + if (!outputDir.mkdirs()) { + throw new IOException("Mkdirs failed to create tar internal dir " + + outputDir); + } + } + + int count; + byte data[] = new byte[2048]; + BufferedOutputStream outputStream = new BufferedOutputStream( + new FileOutputStream(outputFile)); + + while ((count = tis.read(data)) != -1) { + outputStream.write(data, 0, count); + } + + outputStream.flush(); + outputStream.close(); + } + /** * Class for creating hardlinks. * Supports Unix, Cygwin, WindXP. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java index a64b45d80f..e73c644fb0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java @@ -546,4 +546,44 @@ public void testGetDU() throws IOException { long expected = 2 * (3 + System.getProperty("line.separator").length()); Assert.assertEquals(expected, du); } + + private void doUntarAndVerify(File tarFile, File untarDir) + throws IOException { + if (untarDir.exists() && !FileUtil.fullyDelete(untarDir)) { + throw new IOException("Could not delete directory '" + untarDir + "'"); + } + FileUtil.unTar(tarFile, untarDir); + + String parentDir = untarDir.getCanonicalPath() + Path.SEPARATOR + "name"; + File testFile = new File(parentDir + Path.SEPARATOR + "version"); + Assert.assertTrue(testFile.exists()); + Assert.assertTrue(testFile.length() == 0); + String imageDir = parentDir + Path.SEPARATOR + "image"; + testFile = new File(imageDir + Path.SEPARATOR + "fsimage"); + Assert.assertTrue(testFile.exists()); + Assert.assertTrue(testFile.length() == 157); + String currentDir = parentDir + Path.SEPARATOR + "current"; + testFile = new File(currentDir + Path.SEPARATOR + "fsimage"); + Assert.assertTrue(testFile.exists()); + Assert.assertTrue(testFile.length() == 4331); + testFile = new File(currentDir + Path.SEPARATOR + "edits"); + Assert.assertTrue(testFile.exists()); + Assert.assertTrue(testFile.length() == 1033); + testFile = new File(currentDir + Path.SEPARATOR + "fstime"); + Assert.assertTrue(testFile.exists()); + Assert.assertTrue(testFile.length() == 8); + } + + @Test + public void testUntar() throws IOException { + String tarGzFileName = System.getProperty("test.cache.data", + "build/test/cache") + "/test-untar.tgz"; + String tarFileName = System.getProperty("test.cache.data", + "build/test/cache") + "/test-untar.tar"; + String dataDir = System.getProperty("test.build.data", "build/test/data"); + File untarDir = new File(dataDir, "untarDir"); + + doUntarAndVerify(new File(tarGzFileName), untarDir); + doUntarAndVerify(new File(tarFileName), untarDir); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tar b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tar new file mode 100644 index 0000000000..949e985c73 Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tar differ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tgz b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tgz new file mode 100644 index 0000000000..9e9ef40f6f Binary files /dev/null and b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/test-untar.tgz differ