HDFS-2525. Race between BlockPoolSliceScanner and append. Contributed by Brandon Li.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1244640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jitendra Nath Pandey 2012-02-15 18:43:01 +00:00
parent 3bd230af11
commit f82e862e1f
3 changed files with 18 additions and 11 deletions

View File

@ -210,6 +210,9 @@ Trunk (unreleased changes)
dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly
disabled. (atm) disabled. (atm)
HDFS-2525. Race between BlockPoolSliceScanner and append. (Brandon Li
via jitendra)
Release 0.23.2 - UNRELEASED Release 0.23.2 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -51,11 +51,8 @@
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
/** /**
* Performs two types of scanning: * Scans the block files under a block pool and verifies that the
* <li> Gets block files from the data directories and reconciles the * files are not corrupt.
* difference between the blocks on the disk and in memory.</li>
* <li> Scans the data directories for block files under a block pool
* and verifies that the files are not corrupt</li>
* This keeps track of blocks and their last verification times. * This keeps track of blocks and their last verification times.
* Currently it does not modify the metadata for block. * Currently it does not modify the metadata for block.
*/ */
@ -430,6 +427,19 @@ private void verifyBlock(ExtendedBlock block) {
return; return;
} }
// If the block exists, the exception may due to a race with write:
// The BlockSender got an old block path in rbw. BlockReceiver removed
// the rbw block from rbw to finalized but BlockSender tried to open the
// file before BlockReceiver updated the VolumeMap. The state of the
// block can be changed again now, so ignore this error here. If there
// is a block really deleted by mistake, DirectoryScan should catch it.
if (e instanceof FileNotFoundException ) {
LOG.info("Verification failed for " + block +
". It may be due to race with write.");
deleteBlock(block.getLocalBlock());
return;
}
LOG.warn((second ? "Second " : "First ") + "Verification failed for " LOG.warn((second ? "Second " : "First ") + "Verification failed for "
+ block, e); + block, e);

View File

@ -47,12 +47,6 @@ public class TestAppendDifferentChecksum {
public static void setupCluster() throws IOException { public static void setupCluster() throws IOException {
Configuration conf = new HdfsConfiguration(); Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096); conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096);
// disable block scanner, since otherwise this test can trigger
// HDFS-2525, which is a different bug than we're trying to unit test
// here! When HDFS-2525 is fixed, this can be removed.
conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
conf.set("fs.hdfs.impl.disable.cache", "true"); conf.set("fs.hdfs.impl.disable.cache", "true");
cluster = new MiniDFSCluster.Builder(conf) cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(1) .numDataNodes(1)