From 7f7b05226e3ae1fdf3c440f8d26814f4d955f734 Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Thu, 9 Feb 2012 16:41:44 +0000 Subject: [PATCH] HADOOP-8042. When copying a file out of HDFS, modifying it, and uploading it back into HDFS, the put fails due to a CRC mismatch (Daryn Sharp via bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1242389 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 4 + .../apache/hadoop/fs/ChecksumFileSystem.java | 23 ++++- .../java/org/apache/hadoop/fs/FileSystem.java | 9 ++ .../apache/hadoop/fs/FilterFileSystem.java | 5 + .../fs/shell/CommandWithDestination.java | 14 ++- .../apache/hadoop/fs/shell/CopyCommands.java | 31 +----- .../org/apache/hadoop/fs/shell/PathData.java | 14 --- .../hadoop/fs/viewfs/ViewFileSystem.java | 9 ++ .../org/apache/hadoop/fs/TestFsShellCopy.java | 97 +++++++++++++++++++ 9 files changed, 159 insertions(+), 47 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 4cedb5a34f..9dd06f4429 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -162,6 +162,10 @@ Release 0.23.2 - UNRELEASED BUG FIXES + HADOOP-8042 When copying a file out of HDFS, modifying it, and uploading + it back into HDFS, the put fails due to a CRC mismatch + (Daryn Sharp via bobby) + Release 0.23.1 - 2012-02-08 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java index e122f25dcd..17707718b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java @@ -43,6 +43,7 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0}; private int bytesPerChecksum = 512; private boolean verifyChecksum = true; + private boolean writeChecksum = true; public static double getApproxChkSumLength(long size) { return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size; @@ -67,6 +68,11 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { this.verifyChecksum = verifyChecksum; } + @Override + public void setWriteChecksum(boolean writeChecksum) { + this.writeChecksum = writeChecksum; + } + /** get the raw file system */ public FileSystem getRawFileSystem() { return fs; @@ -428,9 +434,20 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { throw new IOException("Mkdirs failed to create " + parent); } } - final FSDataOutputStream out = new FSDataOutputStream( - new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication, - blockSize, progress), null); + final FSDataOutputStream out; + if (writeChecksum) { + out = new FSDataOutputStream( + new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication, + blockSize, progress), null); + } else { + out = fs.create(f, permission, overwrite, bufferSize, replication, + blockSize, progress); + // remove the checksum file since we aren't writing one + Path checkFile = getChecksumFile(f); + if (fs.exists(checkFile)) { + fs.delete(checkFile, true); + } + } if (permission != null) { setPermission(f, permission); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 64f7c68a6d..8db9d25829 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -1936,6 +1936,15 @@ public abstract class FileSystem extends Configured implements Closeable { //doesn't do anything } + /** + * Set the write checksum flag. This is only applicable if the + * corresponding FileSystem supports checksum. By default doesn't do anything. + * @param writeChecksum + */ + public void setWriteChecksum(boolean writeChecksum) { + //doesn't do anything + } + /** * Return a list of file status objects that corresponds to the list of paths * excluding those non-existent paths. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java index dde2520041..9a152cb6d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -360,6 +360,11 @@ public class FilterFileSystem extends FileSystem { public void setVerifyChecksum(boolean verifyChecksum) { fs.setVerifyChecksum(verifyChecksum); } + + @Override + public void setWriteChecksum(boolean writeChecksum) { + fs.setVerifyChecksum(writeChecksum); + } @Override public Configuration getConf() { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java index 6b3b40389f..8a0c91a6da 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java @@ -41,7 +41,9 @@ import org.apache.hadoop.io.IOUtils; */ abstract class CommandWithDestination extends FsCommand { protected PathData dst; - protected boolean overwrite = false; + private boolean overwrite = false; + private boolean verifyChecksum = true; + private boolean writeChecksum = true; /** * @@ -53,6 +55,14 @@ abstract class CommandWithDestination extends FsCommand { overwrite = flag; } + protected void setVerifyChecksum(boolean flag) { + verifyChecksum = flag; + } + + protected void setWriteChecksum(boolean flag) { + writeChecksum = flag; + } + /** * The last arg is expected to be a local path, if only one argument is * given then the destination will be the current directory @@ -201,6 +211,7 @@ abstract class CommandWithDestination extends FsCommand { * @throws IOException if copy fails */ protected void copyFileToTarget(PathData src, PathData target) throws IOException { + src.fs.setVerifyChecksum(verifyChecksum); copyStreamToTarget(src.fs.open(src.path), target); } @@ -217,6 +228,7 @@ abstract class CommandWithDestination extends FsCommand { if (target.exists && (target.stat.isDirectory() || !overwrite)) { throw new PathExistsException(target.toString()); } + target.fs.setWriteChecksum(writeChecksum); PathData tempFile = null; try { tempFile = target.createTempFile(target+"._COPYING_"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java index 5260d9c80d..a42d6a83fd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java @@ -25,7 +25,6 @@ import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.ChecksumFileSystem; import org.apache.hadoop.fs.FileUtil; /** Various commands for copy files */ @@ -103,43 +102,17 @@ class CopyCommands { "to the local name. is kept. When copying multiple,\n" + "files, the destination must be a directory."; - /** - * The prefix for the tmp file used in copyToLocal. - * It must be at least three characters long, required by - * {@link java.io.File#createTempFile(String, String, File)}. - */ - private boolean copyCrc; - private boolean verifyChecksum; - @Override protected void processOptions(LinkedList args) throws IOException { CommandFormat cf = new CommandFormat( 1, Integer.MAX_VALUE, "crc", "ignoreCrc"); cf.parse(args); - copyCrc = cf.getOpt("crc"); - verifyChecksum = !cf.getOpt("ignoreCrc"); - + setWriteChecksum(cf.getOpt("crc")); + setVerifyChecksum(!cf.getOpt("ignoreCrc")); setRecursive(true); getLocalDestination(args); } - - @Override - protected void copyFileToTarget(PathData src, PathData target) - throws IOException { - src.fs.setVerifyChecksum(verifyChecksum); - - if (copyCrc && !(src.fs instanceof ChecksumFileSystem)) { - displayWarning(src.fs + ": Does not support checksums"); - copyCrc = false; - } - - super.copyFileToTarget(src, target); - if (copyCrc) { - // should we delete real file if crc copy fails? - super.copyFileToTarget(src.getChecksumFile(), target.getChecksumFile()); - } - } } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java index a3c88f1f2a..826f20685e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java @@ -27,7 +27,6 @@ import java.net.URISyntaxException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.ChecksumFileSystem; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -169,19 +168,6 @@ public class PathData { } } - /** - * Return the corresponding crc data for a file. Avoids exposing the fs - * contortions to the caller. - * @return PathData of the crc file - * @throws IOException is anything goes wrong - */ - public PathData getChecksumFile() throws IOException { - checkIfExists(FileTypeRequirement.SHOULD_NOT_BE_DIRECTORY); - ChecksumFileSystem srcFs = (ChecksumFileSystem)fs; - Path srcPath = srcFs.getChecksumFile(path); - return new PathData(srcFs.getRawFileSystem(), srcPath.toString()); - } - /** * Returns a temporary file for this PathData with the given extension. * The file will be deleted on exit. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index 1addf2fd88..b3d19f7734 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -470,6 +470,15 @@ public class ViewFileSystem extends FileSystem { } } + @Override + public void setWriteChecksum(final boolean writeChecksum) { + List> mountPoints = + fsState.getMountPoints(); + for (InodeTree.MountPoint mount : mountPoints) { + mount.target.targetFileSystem.setWriteChecksum(writeChecksum); + } + } + public MountPoint[] getMountPoints() { List> mountPoints = fsState.getMountPoints(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java new file mode 100644 index 0000000000..997f9fa6ef --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestFsShellCopy { + static Configuration conf; + static FsShell shell; + static LocalFileSystem lfs; + static Path testRootDir, srcPath, dstPath; + + @BeforeClass + public static void setup() throws Exception { + conf = new Configuration(); + shell = new FsShell(conf); + lfs = FileSystem.getLocal(conf); + testRootDir = new Path( + System.getProperty("test.build.data","test/build/data"), "testShellCopy"); + lfs.mkdirs(testRootDir); + srcPath = new Path(testRootDir, "srcFile"); + dstPath = new Path(testRootDir, "dstFile"); + } + + @Before + public void prepFiles() throws Exception { + lfs.setVerifyChecksum(true); + lfs.setWriteChecksum(true); + + lfs.delete(srcPath, true); + lfs.delete(dstPath, true); + FSDataOutputStream out = lfs.create(srcPath); + out.writeChars("hi"); + out.close(); + assertTrue(lfs.exists(lfs.getChecksumFile(srcPath))); + } + + @Test + public void testCopyNoCrc() throws Exception { + shellRun(0, "-get", srcPath.toString(), dstPath.toString()); + checkPath(dstPath, false); + } + + @Test + public void testCopyCrc() throws Exception { + shellRun(0, "-get", "-crc", srcPath.toString(), dstPath.toString()); + checkPath(dstPath, true); + } + + + @Test + public void testCorruptedCopyCrc() throws Exception { + FSDataOutputStream out = lfs.getRawFileSystem().create(srcPath); + out.writeChars("bang"); + out.close(); + shellRun(1, "-get", srcPath.toString(), dstPath.toString()); + } + + @Test + public void testCorruptedCopyIgnoreCrc() throws Exception { + shellRun(0, "-get", "-ignoreCrc", srcPath.toString(), dstPath.toString()); + checkPath(dstPath, false); + } + + private void checkPath(Path p, boolean expectChecksum) throws IOException { + assertTrue(lfs.exists(p)); + boolean hasChecksum = lfs.exists(lfs.getChecksumFile(p)); + assertEquals(expectChecksum, hasChecksum); + } + + private void shellRun(int n, String ... args) throws Exception { + assertEquals(n, shell.run(args)); + } +}