HDFS-3429. DataNode reads checksums even if client does not need them. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1433117 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2013-01-14 20:47:08 +00:00
parent 337e066bc3
commit 3052ad1f00
15 changed files with 101 additions and 42 deletions

View File

@ -482,6 +482,8 @@ Release 2.0.3-alpha - Unreleased
OPTIMIZATIONS OPTIMIZATIONS
HDFS-3429. DataNode reads checksums even if client does not need them (todd)
BUG FIXES BUG FIXES
HDFS-3919. MiniDFSCluster:waitClusterUp can hang forever. HDFS-3919. MiniDFSCluster:waitClusterUp can hang forever.

View File

@ -380,7 +380,8 @@ public static RemoteBlockReader newBlockReader( Socket sock, String file,
// in and out will be closed when sock is closed (by the caller) // in and out will be closed when sock is closed (by the caller)
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT))); NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT)));
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len); new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
verifyChecksum);
// //
// Get bytes in block, set streams // Get bytes in block, set streams

View File

@ -392,7 +392,8 @@ public static BlockReader newBlockReader(Socket sock, String file,
// in and out will be closed when sock is closed (by the caller) // in and out will be closed when sock is closed (by the caller)
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
ioStreams.out)); ioStreams.out));
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len); new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
verifyChecksum);
// //
// Get bytes in block // Get bytes in block

View File

@ -55,12 +55,15 @@ public interface DataTransferProtocol {
* @param clientName client's name. * @param clientName client's name.
* @param blockOffset offset of the block. * @param blockOffset offset of the block.
* @param length maximum number of bytes for this read. * @param length maximum number of bytes for this read.
* @param sendChecksum if false, the DN should skip reading and sending
* checksums
*/ */
public void readBlock(final ExtendedBlock blk, public void readBlock(final ExtendedBlock blk,
final Token<BlockTokenIdentifier> blockToken, final Token<BlockTokenIdentifier> blockToken,
final String clientName, final String clientName,
final long blockOffset, final long blockOffset,
final long length) throws IOException; final long length,
final boolean sendChecksum) throws IOException;
/** /**
* Write a block to a datanode pipeline. * Write a block to a datanode pipeline.

View File

@ -88,7 +88,8 @@ private void opReadBlock() throws IOException {
PBHelper.convert(proto.getHeader().getBaseHeader().getToken()), PBHelper.convert(proto.getHeader().getBaseHeader().getToken()),
proto.getHeader().getClientName(), proto.getHeader().getClientName(),
proto.getOffset(), proto.getOffset(),
proto.getLen()); proto.getLen(),
proto.getSendChecksums());
} }
/** Receive OP_WRITE_BLOCK */ /** Receive OP_WRITE_BLOCK */

View File

@ -62,6 +62,10 @@ private static void op(final DataOutput out, final Op op
private static void send(final DataOutputStream out, final Op opcode, private static void send(final DataOutputStream out, final Op opcode,
final Message proto) throws IOException { final Message proto) throws IOException {
if (LOG.isTraceEnabled()) {
LOG.trace("Sending DataTransferOp " + proto.getClass().getSimpleName()
+ ": " + proto);
}
op(out, opcode); op(out, opcode);
proto.writeDelimitedTo(out); proto.writeDelimitedTo(out);
out.flush(); out.flush();
@ -72,12 +76,14 @@ public void readBlock(final ExtendedBlock blk,
final Token<BlockTokenIdentifier> blockToken, final Token<BlockTokenIdentifier> blockToken,
final String clientName, final String clientName,
final long blockOffset, final long blockOffset,
final long length) throws IOException { final long length,
final boolean sendChecksum) throws IOException {
OpReadBlockProto proto = OpReadBlockProto.newBuilder() OpReadBlockProto proto = OpReadBlockProto.newBuilder()
.setHeader(DataTransferProtoUtil.buildClientHeader(blk, clientName, blockToken)) .setHeader(DataTransferProtoUtil.buildClientHeader(blk, clientName, blockToken))
.setOffset(blockOffset) .setOffset(blockOffset)
.setLen(length) .setLen(length)
.setSendChecksums(sendChecksum)
.build(); .build();
send(out, Op.READ_BLOCK, proto); send(out, Op.READ_BLOCK, proto);

View File

@ -388,8 +388,8 @@ void verifyBlock(ExtendedBlock block) {
try { try {
adjustThrottler(); adjustThrottler();
blockSender = new BlockSender(block, 0, -1, false, true, datanode, blockSender = new BlockSender(block, 0, -1, false, true, true,
null); datanode, null);
DataOutputStream out = DataOutputStream out =
new DataOutputStream(new IOUtils.NullOutputStream()); new DataOutputStream(new IOUtils.NullOutputStream());

View File

@ -45,6 +45,8 @@
import org.apache.hadoop.net.SocketOutputStream; import org.apache.hadoop.net.SocketOutputStream;
import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DataChecksum;
import com.google.common.base.Preconditions;
/** /**
* Reads a block from the disk and sends it to a recipient. * Reads a block from the disk and sends it to a recipient.
* *
@ -158,12 +160,14 @@ class BlockSender implements java.io.Closeable {
* @param length length of data to read * @param length length of data to read
* @param corruptChecksumOk * @param corruptChecksumOk
* @param verifyChecksum verify checksum while reading the data * @param verifyChecksum verify checksum while reading the data
* @param sendChecksum send checksum to client.
* @param datanode datanode from which the block is being read * @param datanode datanode from which the block is being read
* @param clientTraceFmt format string used to print client trace logs * @param clientTraceFmt format string used to print client trace logs
* @throws IOException * @throws IOException
*/ */
BlockSender(ExtendedBlock block, long startOffset, long length, BlockSender(ExtendedBlock block, long startOffset, long length,
boolean corruptChecksumOk, boolean verifyChecksum, boolean corruptChecksumOk, boolean verifyChecksum,
boolean sendChecksum,
DataNode datanode, String clientTraceFmt) DataNode datanode, String clientTraceFmt)
throws IOException { throws IOException {
try { try {
@ -175,6 +179,13 @@ class BlockSender implements java.io.Closeable {
this.shouldDropCacheBehindRead = datanode.getDnConf().dropCacheBehindReads; this.shouldDropCacheBehindRead = datanode.getDnConf().dropCacheBehindReads;
this.datanode = datanode; this.datanode = datanode;
if (verifyChecksum) {
// To simplify implementation, callers may not specify verification
// without sending.
Preconditions.checkArgument(sendChecksum,
"If verifying checksum, currently must also send it.");
}
final Replica replica; final Replica replica;
final long replicaVisibleLength; final long replicaVisibleLength;
synchronized(datanode.data) { synchronized(datanode.data) {
@ -213,29 +224,37 @@ class BlockSender implements java.io.Closeable {
* False, True: will verify checksum * False, True: will verify checksum
* False, False: throws IOException file not found * False, False: throws IOException file not found
*/ */
DataChecksum csum; DataChecksum csum = null;
final InputStream metaIn = datanode.data.getMetaDataInputStream(block); if (verifyChecksum || sendChecksum) {
if (!corruptChecksumOk || metaIn != null) { final InputStream metaIn = datanode.data.getMetaDataInputStream(block);
if (metaIn == null) { if (!corruptChecksumOk || metaIn != null) {
//need checksum but meta-data not found if (metaIn == null) {
throw new FileNotFoundException("Meta-data not found for " + block); //need checksum but meta-data not found
} throw new FileNotFoundException("Meta-data not found for " + block);
}
checksumIn = new DataInputStream(
new BufferedInputStream(metaIn, HdfsConstants.IO_FILE_BUFFER_SIZE));
// read and handle the common header here. For now just a version checksumIn = new DataInputStream(
BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn); new BufferedInputStream(metaIn, HdfsConstants.IO_FILE_BUFFER_SIZE));
short version = header.getVersion();
if (version != BlockMetadataHeader.VERSION) { // read and handle the common header here. For now just a version
LOG.warn("Wrong version (" + version + ") for metadata file for " BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
+ block + " ignoring ..."); short version = header.getVersion();
if (version != BlockMetadataHeader.VERSION) {
LOG.warn("Wrong version (" + version + ") for metadata file for "
+ block + " ignoring ...");
}
csum = header.getChecksum();
} else {
LOG.warn("Could not find metadata file for " + block);
} }
csum = header.getChecksum(); }
} else { if (csum == null) {
LOG.warn("Could not find metadata file for " + block); // The number of bytes per checksum here determines the alignment
// This only decides the buffer size. Use BUFFER_SIZE? // of reads: we always start reading at a checksum chunk boundary,
csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 16 * 1024); // even if the checksum type is NULL. So, choosing too big of a value
// would risk sending too much unnecessary data. 512 (1 disk sector)
// is likely to result in minimal extra IO.
csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
} }
/* /*

View File

@ -1441,7 +1441,7 @@ public void run() {
HdfsConstants.SMALL_BUFFER_SIZE)); HdfsConstants.SMALL_BUFFER_SIZE));
in = new DataInputStream(unbufIn); in = new DataInputStream(unbufIn);
blockSender = new BlockSender(b, 0, b.getNumBytes(), blockSender = new BlockSender(b, 0, b.getNumBytes(),
false, false, DataNode.this, null); false, false, true, DataNode.this, null);
DatanodeInfo srcNode = new DatanodeInfo(bpReg); DatanodeInfo srcNode = new DatanodeInfo(bpReg);
// //

View File

@ -241,7 +241,8 @@ public void readBlock(final ExtendedBlock block,
final Token<BlockTokenIdentifier> blockToken, final Token<BlockTokenIdentifier> blockToken,
final String clientName, final String clientName,
final long blockOffset, final long blockOffset,
final long length) throws IOException { final long length,
final boolean sendChecksum) throws IOException {
previousOpClientName = clientName; previousOpClientName = clientName;
OutputStream baseStream = getOutputStream(); OutputStream baseStream = getOutputStream();
@ -266,7 +267,7 @@ public void readBlock(final ExtendedBlock block,
try { try {
try { try {
blockSender = new BlockSender(block, blockOffset, length, blockSender = new BlockSender(block, blockOffset, length,
true, false, datanode, clientTraceFmt); true, false, sendChecksum, datanode, clientTraceFmt);
} catch(IOException e) { } catch(IOException e) {
String msg = "opReadBlock " + block + " received exception " + e; String msg = "opReadBlock " + block + " received exception " + e;
LOG.info(msg); LOG.info(msg);
@ -654,7 +655,7 @@ public void copyBlock(final ExtendedBlock block,
try { try {
// check if the block exists or not // check if the block exists or not
blockSender = new BlockSender(block, 0, -1, false, false, datanode, blockSender = new BlockSender(block, 0, -1, false, false, true, datanode,
null); null);
// set up response stream // set up response stream

View File

@ -52,6 +52,7 @@ message OpReadBlockProto {
required ClientOperationHeaderProto header = 1; required ClientOperationHeaderProto header = 1;
required uint64 offset = 2; required uint64 offset = 2;
required uint64 len = 3; required uint64 len = 3;
optional bool sendChecksums = 4 [default = true];
} }

View File

@ -444,21 +444,21 @@ public void testDataTransferProtocol() throws IOException {
recvBuf.reset(); recvBuf.reset();
blk.setBlockId(blkid-1); blk.setBlockId(blkid-1);
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl",
0L, fileLen); 0L, fileLen, true);
sendRecvData("Wrong block ID " + newBlockId + " for read", false); sendRecvData("Wrong block ID " + newBlockId + " for read", false);
// negative block start offset -1L // negative block start offset -1L
sendBuf.reset(); sendBuf.reset();
blk.setBlockId(blkid); blk.setBlockId(blkid);
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl",
-1L, fileLen); -1L, fileLen, true);
sendRecvData("Negative start-offset for read for block " + sendRecvData("Negative start-offset for read for block " +
firstBlock.getBlockId(), false); firstBlock.getBlockId(), false);
// bad block start offset // bad block start offset
sendBuf.reset(); sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl",
fileLen, fileLen); fileLen, fileLen, true);
sendRecvData("Wrong start-offset for reading block " + sendRecvData("Wrong start-offset for reading block " +
firstBlock.getBlockId(), false); firstBlock.getBlockId(), false);
@ -475,7 +475,7 @@ public void testDataTransferProtocol() throws IOException {
sendBuf.reset(); sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl",
0L, -1L-random.nextInt(oneMil)); 0L, -1L-random.nextInt(oneMil), true);
sendRecvData("Negative length for reading block " + sendRecvData("Negative length for reading block " +
firstBlock.getBlockId(), false); firstBlock.getBlockId(), false);
@ -488,14 +488,14 @@ public void testDataTransferProtocol() throws IOException {
recvOut); recvOut);
sendBuf.reset(); sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl",
0L, fileLen+1); 0L, fileLen+1, true);
sendRecvData("Wrong length for reading block " + sendRecvData("Wrong length for reading block " +
firstBlock.getBlockId(), false); firstBlock.getBlockId(), false);
//At the end of all this, read the file to make sure that succeeds finally. //At the end of all this, read the file to make sure that succeeds finally.
sendBuf.reset(); sendBuf.reset();
sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl", sender.readBlock(blk, BlockTokenSecretManager.DUMMY_TOKEN, "cl",
0L, fileLen); 0L, fileLen, true);
readFile(fileSys, file, fileLen); readFile(fileSys, file, fileLen);
} finally { } finally {
cluster.shutdown(); cluster.shutdown();

View File

@ -19,6 +19,9 @@
import java.io.IOException; import java.io.IOException;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol;
import org.apache.log4j.Level;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -56,4 +59,11 @@ public void testParallelReadByteBuffer() throws IOException {
public void testParallelReadMixed() throws IOException { public void testParallelReadMixed() throws IOException {
runTestWorkload(new MixedWorkloadHelper()); runTestWorkload(new MixedWorkloadHelper());
} }
@Test
public void testParallelNoChecksums() throws IOException {
verifyChecksums = false;
runTestWorkload(new MixedWorkloadHelper());
}
} }

View File

@ -46,6 +46,7 @@ public class TestParallelReadUtil {
static final int FILE_SIZE_K = 256; static final int FILE_SIZE_K = 256;
static Random rand = null; static Random rand = null;
static final int DEFAULT_REPLICATION_FACTOR = 2; static final int DEFAULT_REPLICATION_FACTOR = 2;
protected boolean verifyChecksums = true;
static { static {
// The client-trace log ends up causing a lot of blocking threads // The client-trace log ends up causing a lot of blocking threads
@ -317,7 +318,8 @@ boolean runParallelRead(int nFiles, int nWorkerEach, ReadWorkerHelper helper) th
testInfo.filepath = new Path("/TestParallelRead.dat." + i); testInfo.filepath = new Path("/TestParallelRead.dat." + i);
testInfo.authenticData = util.writeFile(testInfo.filepath, FILE_SIZE_K); testInfo.authenticData = util.writeFile(testInfo.filepath, FILE_SIZE_K);
testInfo.dis = dfsClient.open(testInfo.filepath.toString()); testInfo.dis = dfsClient.open(testInfo.filepath.toString(),
dfsClient.dfsClientConf.ioBufferSize, verifyChecksums);
for (int j = 0; j < nWorkerEach; ++j) { for (int j = 0; j < nWorkerEach; ++j) {
workers[nWorkers++] = new ReadWorker(testInfo, nWorkers, helper); workers[nWorkers++] = new ReadWorker(testInfo, nWorkers, helper);

View File

@ -24,11 +24,14 @@
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.log4j.Level;
import org.junit.Test; import org.junit.Test;
/** /**
@ -194,11 +197,19 @@ private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
*/ */
@Test @Test
public void testPreadDFS() throws IOException { public void testPreadDFS() throws IOException {
dfsPreadTest(false); //normal pread dfsPreadTest(false, true); //normal pread
dfsPreadTest(true); //trigger read code path without transferTo. dfsPreadTest(true, true); //trigger read code path without transferTo.
} }
private void dfsPreadTest(boolean disableTransferTo) throws IOException { @Test
public void testPreadDFSNoChecksum() throws IOException {
((Log4JLogger)DataTransferProtocol.LOG).getLogger().setLevel(Level.ALL);
dfsPreadTest(false, false);
dfsPreadTest(true, false);
}
private void dfsPreadTest(boolean disableTransferTo, boolean verifyChecksum)
throws IOException {
Configuration conf = new HdfsConfiguration(); Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096);
conf.setLong(DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY, 4096); conf.setLong(DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY, 4096);
@ -210,6 +221,7 @@ private void dfsPreadTest(boolean disableTransferTo) throws IOException {
} }
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
FileSystem fileSys = cluster.getFileSystem(); FileSystem fileSys = cluster.getFileSystem();
fileSys.setVerifyChecksum(verifyChecksum);
try { try {
Path file1 = new Path("preadtest.dat"); Path file1 = new Path("preadtest.dat");
writeFile(fileSys, file1); writeFile(fileSys, file1);