HDFS-9705. Refine the behaviour of getFileChecksum when length = 0. Contributed by Kai Zheng and SammiChen.

This commit is contained in:
Andrew Wang 2017-03-14 16:41:10 -07:00
parent 4c66a8d19b
commit cc1292e73a
4 changed files with 48 additions and 32 deletions

View File

@ -1731,10 +1731,14 @@ public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length)
checkOpen(); checkOpen();
Preconditions.checkArgument(length >= 0); Preconditions.checkArgument(length >= 0);
LocatedBlocks blockLocations = getBlockLocations(src, length); LocatedBlocks blockLocations = null;
FileChecksumHelper.FileChecksumComputer maker = null;
ErasureCodingPolicy ecPolicy = null;
if (length > 0) {
blockLocations = getBlockLocations(src, length);
ecPolicy = blockLocations.getErasureCodingPolicy();
}
FileChecksumHelper.FileChecksumComputer maker;
ErasureCodingPolicy ecPolicy = blockLocations.getErasureCodingPolicy();
maker = ecPolicy != null ? maker = ecPolicy != null ?
new FileChecksumHelper.StripedFileNonStripedChecksumComputer(src, new FileChecksumHelper.StripedFileNonStripedChecksumComputer(src,
length, blockLocations, namenode, this, ecPolicy) : length, blockLocations, namenode, this, ecPolicy) :

View File

@ -95,12 +95,14 @@ static abstract class FileChecksumComputer {
this.client = client; this.client = client;
this.remaining = length; this.remaining = length;
if (blockLocations != null) {
if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
this.remaining = Math.min(length, blockLocations.getFileLength()); this.remaining = Math.min(length, blockLocations.getFileLength());
} }
this.locatedBlocks = blockLocations.getLocatedBlocks(); this.locatedBlocks = blockLocations.getLocatedBlocks();
} }
}
String getSrc() { String getSrc() {
return src; return src;
@ -203,10 +205,24 @@ void setLastRetriedIndex(int lastRetriedIndex) {
* @throws IOException * @throws IOException
*/ */
void compute() throws IOException { void compute() throws IOException {
/**
* request length is 0 or the file is empty, return one with the
* magic entry that matches what previous hdfs versions return.
*/
if (locatedBlocks == null || locatedBlocks.isEmpty()) {
// Explicitly specified here in case the default DataOutputBuffer
// buffer length value is changed in future. This matters because the
// fixed value 32 has to be used to repeat the magic value for previous
// HDFS version.
final int lenOfZeroBytes = 32;
byte[] emptyBlockMd5 = new byte[lenOfZeroBytes];
MD5Hash fileMD5 = MD5Hash.digest(emptyBlockMd5);
fileChecksum = new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
} else {
checksumBlocks(); checksumBlocks();
fileChecksum = makeFinalResult(); fileChecksum = makeFinalResult();
} }
}
/** /**
* Compute and aggregate block checksums block by block. * Compute and aggregate block checksums block by block.
@ -228,15 +244,7 @@ MD5MD5CRC32FileChecksum makeFinalResult() {
return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
crcPerBlock, fileMD5); crcPerBlock, fileMD5);
default: default:
// If there is no block allocated for the file, // we will get here when crcType is "NULL".
// return one with the magic entry that matches what previous
// hdfs versions return.
if (locatedBlocks.isEmpty()) {
return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
}
// we should never get here since the validity was checked
// when getCrcType() was called above.
return null; return null;
} }
} }
@ -412,7 +420,7 @@ && getCrcType() != ct) {
} }
/** /**
* Striped file checksum computing. * Non-striped checksum computing for striped files.
*/ */
static class StripedFileNonStripedChecksumComputer static class StripedFileNonStripedChecksumComputer
extends FileChecksumComputer { extends FileChecksumComputer {

View File

@ -66,7 +66,7 @@ private BlockChecksumHelper() {
} }
/** /**
* The abstract base block checksum computer. * The abstract block checksum computer.
*/ */
static abstract class AbstractBlockChecksumComputer { static abstract class AbstractBlockChecksumComputer {
private final DataNode datanode; private final DataNode datanode;
@ -139,7 +139,7 @@ int getChecksumSize() {
} }
/** /**
* The abstract base block checksum computer. * The abstract base block checksum computer, mainly for replicated blocks.
*/ */
static abstract class BlockChecksumComputer static abstract class BlockChecksumComputer
extends AbstractBlockChecksumComputer { extends AbstractBlockChecksumComputer {

View File

@ -1030,13 +1030,6 @@ public FileSystem run() throws Exception {
out.close(); out.close();
} }
// verify the magic val for zero byte files
{
final FileChecksum zeroChecksum = hdfs.getFileChecksum(zeroByteFile);
assertEquals(zeroChecksum.toString(),
"MD5-of-0MD5-of-0CRC32:70bc8f4b72a86921468bf8e8441dce51");
}
//write another file //write another file
final Path bar = new Path(dir, "bar" + n); final Path bar = new Path(dir, "bar" + n);
{ {
@ -1046,7 +1039,18 @@ public FileSystem run() throws Exception {
out.close(); out.close();
} }
{ //verify checksum {
final FileChecksum zeroChecksum = hdfs.getFileChecksum(zeroByteFile);
final String magicValue =
"MD5-of-0MD5-of-0CRC32:70bc8f4b72a86921468bf8e8441dce51";
// verify the magic val for zero byte files
assertEquals(magicValue, zeroChecksum.toString());
//verify checksums for empty file and 0 request length
final FileChecksum checksumWith0 = hdfs.getFileChecksum(bar, 0);
assertEquals(zeroChecksum, checksumWith0);
//verify checksum
final FileChecksum barcs = hdfs.getFileChecksum(bar); final FileChecksum barcs = hdfs.getFileChecksum(bar);
final int barhashcode = barcs.hashCode(); final int barhashcode = barcs.hashCode();
assertEquals(hdfsfoocs.hashCode(), barhashcode); assertEquals(hdfsfoocs.hashCode(), barhashcode);