From b427fe9de879f178a4adec2931b7d5f324ffc764 Mon Sep 17 00:00:00 2001 From: Jonathan Turner Eagles Date: Wed, 6 Mar 2013 23:17:14 +0000 Subject: [PATCH] HADOOP-9209. Add shell command to dump file checksums (Todd Lipcon via jeagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1453613 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 ++ .../org/apache/hadoop/fs/shell/Display.java | 35 ++++++++++++++++++- .../src/test/resources/testConf.xml | 16 +++++++++ .../src/test/resources/testHDFSConf.xml | 33 +++++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 03f2267c79..1a381c980a 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1506,6 +1506,9 @@ Release 0.23.7 - UNRELEASED HADOOP-9352. Expose UGI.setLoginUser for tests (daryn) + HADOOP-9209. Add shell command to dump file checksums (Todd Lipcon via + jeagles) + OPTIMIZATIONS HADOOP-8462. Native-code implementation of bzip2 codec. (Govind Kamat via diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java index 4c31c0eaad..eb8a8cfca2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java @@ -36,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIsDirectoryException; @@ -47,13 +48,14 @@ import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; import org.codehaus.jackson.JsonEncoding; import org.codehaus.jackson.JsonFactory; import org.codehaus.jackson.JsonGenerator; import org.codehaus.jackson.util.MinimalPrettyPrinter; /** - * Display contents of files + * Display contents or checksums of files */ @InterfaceAudience.Private @InterfaceStability.Evolving @@ -62,6 +64,7 @@ class Display extends FsCommand { public static void registerCommands(CommandFactory factory) { factory.addClass(Cat.class, "-cat"); factory.addClass(Text.class, "-text"); + factory.addClass(Checksum.class, "-checksum"); } /** @@ -161,6 +164,36 @@ protected InputStream getInputStream(PathData item) throws IOException { return i; } } + + public static class Checksum extends Display { + public static final String NAME = "checksum"; + public static final String USAGE = " ..."; + public static final String DESCRIPTION = + "Dump checksum information for files that match the file\n" + + "pattern to stdout. Note that this requires a round-trip\n" + + "to a datanode storing each block of the file, and thus is not\n" + + "efficient to run on a large number of files. The checksum of a\n" + + "file depends on its content, block size and the checksum\n" + + "algorithm and parameters used for creating the file."; + + @Override + protected void processPath(PathData item) throws IOException { + if (item.stat.isDirectory()) { + throw new PathIsDirectoryException(item.toString()); + } + + FileChecksum checksum = item.fs.getFileChecksum(item.path); + if (checksum == null) { + out.printf("%s\tNONE\t\n", item.toString()); + } else { + String checksumString = StringUtils.byteToHexString( + checksum.getBytes(), 0, checksum.getLength()); + out.printf("%s\t%s\t%s\n", + item.toString(), checksum.getAlgorithmName(), + checksumString); + } + } + } protected class TextRecordInputStream extends InputStream { SequenceFile.Reader r; diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index 65a522b1b7..fd4e5d14fd 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -486,6 +486,22 @@ + + help: help for checksum + + -help checksum + + + + + + + RegexpComparator + ^-checksum <src> \.\.\.:( |\t)*Dump checksum information for files.* + + + + help: help for copyToLocal diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml index 2fb10837fc..940c0f6f98 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml @@ -5457,6 +5457,39 @@ + + + checksum: checksum of files(relative path) using globbing + + -fs NAMENODE -mkdir -p dir0 + -fs NAMENODE -put CLITEST_DATA/data15bytes dir0/data15bytes + -fs NAMENODE -put CLITEST_DATA/data30bytes dir0/data30bytes + -fs NAMENODE -put CLITEST_DATA/data60bytes dir0/data60bytes + -fs NAMENODE -put CLITEST_DATA/data120bytes dir0/data120bytes + -fs NAMENODE -checksum dir0/data* + + + -fs NAMENODE -rm -r /user + + + + RegexpComparator + ^dir0/data120bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000a58cdc3c0967fc8cddb7fed5960d06f2 + + + RegexpComparator + ^dir0/data15bytes\tMD5-of-0MD5-of-512CRC32C\t0000020000000000000000007267e9528002723a30939aefc238d665 + + + RegexpComparator + ^dir0/data30bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000fc09371298117c4943cf089b4bd79c96 + + + RegexpComparator + ^dir0/data60bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000009476431d851dd7b0a8d057a404d7b9 + + +