HADOOP-9209. Add shell command to dump file checksums (Todd Lipcon via jeagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1453613 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Turner Eagles 2013-03-06 23:17:14 +00:00
parent 85470f0a33
commit b427fe9de8
4 changed files with 86 additions and 1 deletions

View File

@ -1506,6 +1506,9 @@ Release 0.23.7 - UNRELEASED
HADOOP-9352. Expose UGI.setLoginUser for tests (daryn)
HADOOP-9209. Add shell command to dump file checksums (Todd Lipcon via
jeagles)
OPTIMIZATIONS
HADOOP-8462. Native-code implementation of bzip2 codec. (Govind Kamat via

View File

@ -36,6 +36,7 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIsDirectoryException;
@ -47,13 +48,14 @@
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.util.MinimalPrettyPrinter;
/**
* Display contents of files
* Display contents or checksums of files
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
@ -62,6 +64,7 @@ class Display extends FsCommand {
public static void registerCommands(CommandFactory factory) {
factory.addClass(Cat.class, "-cat");
factory.addClass(Text.class, "-text");
factory.addClass(Checksum.class, "-checksum");
}
/**
@ -162,6 +165,36 @@ protected InputStream getInputStream(PathData item) throws IOException {
}
}
public static class Checksum extends Display {
public static final String NAME = "checksum";
public static final String USAGE = "<src> ...";
public static final String DESCRIPTION =
"Dump checksum information for files that match the file\n" +
"pattern <src> to stdout. Note that this requires a round-trip\n" +
"to a datanode storing each block of the file, and thus is not\n" +
"efficient to run on a large number of files. The checksum of a\n" +
"file depends on its content, block size and the checksum\n" +
"algorithm and parameters used for creating the file.";
@Override
protected void processPath(PathData item) throws IOException {
if (item.stat.isDirectory()) {
throw new PathIsDirectoryException(item.toString());
}
FileChecksum checksum = item.fs.getFileChecksum(item.path);
if (checksum == null) {
out.printf("%s\tNONE\t\n", item.toString());
} else {
String checksumString = StringUtils.byteToHexString(
checksum.getBytes(), 0, checksum.getLength());
out.printf("%s\t%s\t%s\n",
item.toString(), checksum.getAlgorithmName(),
checksumString);
}
}
}
protected class TextRecordInputStream extends InputStream {
SequenceFile.Reader r;
Writable key;

View File

@ -486,6 +486,22 @@
</comparators>
</test>
<test> <!-- TESTED -->
<description>help: help for checksum</description>
<test-commands>
<command>-help checksum</command>
</test-commands>
<cleanup-commands>
<!-- No cleanup -->
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpComparator</type>
<expected-output>^-checksum &lt;src&gt; \.\.\.:( |\t)*Dump checksum information for files.*</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>help: help for copyToLocal</description>
<test-commands>

View File

@ -5458,6 +5458,39 @@
</comparators>
</test>
<test> <!-- TESTED-->
<description>checksum: checksum of files(relative path) using globbing</description>
<test-commands>
<command>-fs NAMENODE -mkdir -p dir0</command>
<command>-fs NAMENODE -put CLITEST_DATA/data15bytes dir0/data15bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data30bytes dir0/data30bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes dir0/data60bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes dir0/data120bytes</command>
<command>-fs NAMENODE -checksum dir0/data*</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /user</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpComparator</type>
<expected-output>^dir0/data120bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000a58cdc3c0967fc8cddb7fed5960d06f2</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^dir0/data15bytes\tMD5-of-0MD5-of-512CRC32C\t0000020000000000000000007267e9528002723a30939aefc238d665</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^dir0/data30bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000fc09371298117c4943cf089b4bd79c96</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^dir0/data60bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000009476431d851dd7b0a8d057a404d7b9</expected-output>
</comparator>
</comparators>
</test>
<!-- Tests for moveToLocal -->
<!-- Not yet implemented -->