HDFS-10282. The VolumeScanner should warn about replica files which are misplaced. Contributed by Colin Patrick McCabe.

This commit is contained in:
Kihwal Lee 2016-04-14 07:58:24 -05:00
parent df18b6e984
commit 0d1c1152f1
6 changed files with 111 additions and 8 deletions

View File

@ -914,13 +914,13 @@ private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol,
*/ */
private void verifyFileLocation(File actualBlockFile, private void verifyFileLocation(File actualBlockFile,
File bpFinalizedDir, long blockId) { File bpFinalizedDir, long blockId) {
File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId); File expectedBlockDir =
if (actualBlockFile.getParentFile().compareTo(blockDir) != 0) { DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
File expBlockFile = new File(blockDir, actualBlockFile.getName()); File actualBlockDir = actualBlockFile.getParentFile();
LOG.warn("Block: " + blockId if (actualBlockDir.compareTo(expectedBlockDir) != 0) {
+ " has to be upgraded to block ID-based layout. " LOG.warn("Block: " + blockId +
+ "Actual block file path: " + actualBlockFile " found in invalid directory. Expected directory: " +
+ ", expected block file path: " + expBlockFile); expectedBlockDir + ". Actual directory: " + actualBlockDir);
} }
} }

View File

@ -414,7 +414,7 @@ private long scanBlock(ExtendedBlock cblock, long bytesPerSec) {
Block b = volume.getDataset().getStoredBlock( Block b = volume.getDataset().getStoredBlock(
cblock.getBlockPoolId(), cblock.getBlockId()); cblock.getBlockPoolId(), cblock.getBlockId());
if (b == null) { if (b == null) {
LOG.info("FileNotFound while finding block {} on volume {}", LOG.info("Replica {} was not found in the VolumeMap for volume {}",
cblock, volume.getBasePath()); cblock, volume.getBasePath());
} else { } else {
block = new ExtendedBlock(cblock.getBlockPoolId(), b); block = new ExtendedBlock(cblock.getBlockPoolId(), b);

View File

@ -697,6 +697,18 @@ public ExtendedBlock nextBlock() throws IOException {
} else { } else {
ExtendedBlock block = ExtendedBlock block =
new ExtendedBlock(bpid, Block.filename2id(state.curEntry)); new ExtendedBlock(bpid, Block.filename2id(state.curEntry));
File expectedBlockDir = DatanodeUtil.idToBlockDir(
new File("."), block.getBlockId());
File actualBlockDir = Paths.get(".",
state.curFinalizedDir, state.curFinalizedSubDir).toFile();
if (!expectedBlockDir.equals(actualBlockDir)) {
LOG.error("nextBlock({}, {}): block id {} found in invalid " +
"directory. Expected directory: {}. " +
"Actual directory: {}", storageID, bpid,
block.getBlockId(), expectedBlockDir.getPath(),
actualBlockDir.getPath());
continue;
}
LOG.trace("nextBlock({}, {}): advancing to {}", LOG.trace("nextBlock({}, {}): advancing to {}",
storageID, bpid, block); storageID, bpid, block);
return block; return block;

View File

@ -135,6 +135,13 @@ interface MaterializedReplica {
* @throws IOException I/O error. * @throws IOException I/O error.
*/ */
void truncateMeta(long newSize) throws IOException; void truncateMeta(long newSize) throws IOException;
/**
* Make the replica unreachable, perhaps by renaming it to an
* invalid file name.
* @throws IOException On I/O error.
*/
void makeUnreachable() throws IOException;
} }
/** /**

View File

@ -24,6 +24,7 @@
import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS; import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import java.io.Closeable; import java.io.Closeable;
import java.io.File; import java.io.File;
@ -38,6 +39,7 @@
import com.google.common.base.Supplier; import com.google.common.base.Supplier;
import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.datanode.FsDatasetTestUtils.MaterializedReplica;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler; import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -139,6 +141,11 @@ public ExtendedBlock getFileBlock(int nsIdx, int fileIdx)
throws Exception { throws Exception {
return DFSTestUtil.getFirstBlock(dfs[nsIdx], getPath(fileIdx)); return DFSTestUtil.getFirstBlock(dfs[nsIdx], getPath(fileIdx));
} }
public MaterializedReplica getMaterializedReplica(int nsIdx, int fileIdx)
throws Exception {
return cluster.getMaterializedReplica(0, getFileBlock(nsIdx, fileIdx));
}
} }
/** /**
@ -806,4 +813,60 @@ public Boolean get() {
info.blocksScanned = 0; info.blocksScanned = 0;
} }
} }
/**
* Test that blocks which are in the wrong location are ignored.
*/
@Test(timeout=120000)
public void testIgnoreMisplacedBlock() throws Exception {
Configuration conf = new Configuration();
// Set a really long scan period.
conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
TestScanResultHandler.class.getName());
conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
final TestContext ctx = new TestContext(conf, 1);
final int NUM_FILES = 4;
ctx.createFiles(0, NUM_FILES, 5);
MaterializedReplica unreachableReplica = ctx.getMaterializedReplica(0, 1);
ExtendedBlock unreachableBlock = ctx.getFileBlock(0, 1);
unreachableReplica.makeUnreachable();
final TestScanResultHandler.Info info =
TestScanResultHandler.getInfo(ctx.volumes.get(0));
String storageID = ctx.volumes.get(0).getStorageID();
synchronized (info) {
info.sem = new Semaphore(NUM_FILES);
info.shouldRun = true;
info.notify();
}
// Scan the first 4 blocks
LOG.info("Waiting for the blocks to be scanned.");
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
synchronized (info) {
if (info.blocksScanned >= NUM_FILES - 1) {
LOG.info("info = {}. blockScanned has now reached " +
info.blocksScanned, info);
return true;
} else {
LOG.info("info = {}. Waiting for blockScanned to reach " +
(NUM_FILES - 1), info);
return false;
}
}
}
}, 50, 30000);
// We should have scanned 4 blocks
synchronized (info) {
assertFalse(info.goodBlocks.contains(unreachableBlock));
assertFalse(info.badBlocks.contains(unreachableBlock));
assertEquals("Expected 3 good blocks.", 3, info.goodBlocks.size());
info.goodBlocks.clear();
assertEquals("Expected 3 blocksScanned", 3, info.blocksScanned);
assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
info.blocksScanned = 0;
}
info.sem.release(1);
}
} }

View File

@ -170,6 +170,27 @@ public void truncateMeta(long newSize) throws IOException {
truncate(metaFile, newSize); truncate(metaFile, newSize);
} }
@Override
public void makeUnreachable() throws IOException {
long blockId = Block.getBlockId(blockFile.getAbsolutePath());
File origDir = blockFile.getParentFile();
File root = origDir.getParentFile().getParentFile();
File newDir = null;
// Keep incrementing the block ID until the block and metadata
// files end up in a different directory. Actually, with the
// current replica file placement scheme, this should only ever
// require one increment, but this is a bit of defensive coding.
do {
blockId++;
newDir = DatanodeUtil.idToBlockDir(root, blockId);
} while (origDir.equals(newDir));
Files.createDirectories(newDir.toPath());
Files.move(blockFile.toPath(),
new File(newDir, blockFile.getName()).toPath());
Files.move(metaFile.toPath(),
new File(newDir, metaFile.getName()).toPath());
}
@Override @Override
public String toString() { public String toString() {
return String.format("MaterializedReplica: file=%s", blockFile); return String.format("MaterializedReplica: file=%s", blockFile);