HDFS-2525. Race between BlockPoolSliceScanner and append. Contributed by Brandon Li.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1244640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3bd230af11
commit
f82e862e1f
@ -210,6 +210,9 @@ Trunk (unreleased changes)
|
|||||||
dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly
|
dfs.client.block.write.replace-datanode-on-failure.enable to be mistakenly
|
||||||
disabled. (atm)
|
disabled. (atm)
|
||||||
|
|
||||||
|
HDFS-2525. Race between BlockPoolSliceScanner and append. (Brandon Li
|
||||||
|
via jitendra)
|
||||||
|
|
||||||
Release 0.23.2 - UNRELEASED
|
Release 0.23.2 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -51,11 +51,8 @@
|
|||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs two types of scanning:
|
* Scans the block files under a block pool and verifies that the
|
||||||
* <li> Gets block files from the data directories and reconciles the
|
* files are not corrupt.
|
||||||
* difference between the blocks on the disk and in memory.</li>
|
|
||||||
* <li> Scans the data directories for block files under a block pool
|
|
||||||
* and verifies that the files are not corrupt</li>
|
|
||||||
* This keeps track of blocks and their last verification times.
|
* This keeps track of blocks and their last verification times.
|
||||||
* Currently it does not modify the metadata for block.
|
* Currently it does not modify the metadata for block.
|
||||||
*/
|
*/
|
||||||
@ -430,6 +427,19 @@ private void verifyBlock(ExtendedBlock block) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the block exists, the exception may due to a race with write:
|
||||||
|
// The BlockSender got an old block path in rbw. BlockReceiver removed
|
||||||
|
// the rbw block from rbw to finalized but BlockSender tried to open the
|
||||||
|
// file before BlockReceiver updated the VolumeMap. The state of the
|
||||||
|
// block can be changed again now, so ignore this error here. If there
|
||||||
|
// is a block really deleted by mistake, DirectoryScan should catch it.
|
||||||
|
if (e instanceof FileNotFoundException ) {
|
||||||
|
LOG.info("Verification failed for " + block +
|
||||||
|
". It may be due to race with write.");
|
||||||
|
deleteBlock(block.getLocalBlock());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
LOG.warn((second ? "Second " : "First ") + "Verification failed for "
|
LOG.warn((second ? "Second " : "First ") + "Verification failed for "
|
||||||
+ block, e);
|
+ block, e);
|
||||||
|
|
||||||
|
@ -47,12 +47,6 @@ public class TestAppendDifferentChecksum {
|
|||||||
public static void setupCluster() throws IOException {
|
public static void setupCluster() throws IOException {
|
||||||
Configuration conf = new HdfsConfiguration();
|
Configuration conf = new HdfsConfiguration();
|
||||||
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096);
|
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096);
|
||||||
|
|
||||||
// disable block scanner, since otherwise this test can trigger
|
|
||||||
// HDFS-2525, which is a different bug than we're trying to unit test
|
|
||||||
// here! When HDFS-2525 is fixed, this can be removed.
|
|
||||||
conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
|
|
||||||
|
|
||||||
conf.set("fs.hdfs.impl.disable.cache", "true");
|
conf.set("fs.hdfs.impl.disable.cache", "true");
|
||||||
cluster = new MiniDFSCluster.Builder(conf)
|
cluster = new MiniDFSCluster.Builder(conf)
|
||||||
.numDataNodes(1)
|
.numDataNodes(1)
|
||||||
|
Loading…
Reference in New Issue
Block a user