HDFS-10282. The VolumeScanner should warn about replica files which are misplaced. Contributed by Colin Patrick McCabe.
This commit is contained in:
parent
df18b6e984
commit
0d1c1152f1
@ -914,13 +914,13 @@ private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol,
|
|||||||
*/
|
*/
|
||||||
private void verifyFileLocation(File actualBlockFile,
|
private void verifyFileLocation(File actualBlockFile,
|
||||||
File bpFinalizedDir, long blockId) {
|
File bpFinalizedDir, long blockId) {
|
||||||
File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
|
File expectedBlockDir =
|
||||||
if (actualBlockFile.getParentFile().compareTo(blockDir) != 0) {
|
DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
|
||||||
File expBlockFile = new File(blockDir, actualBlockFile.getName());
|
File actualBlockDir = actualBlockFile.getParentFile();
|
||||||
LOG.warn("Block: " + blockId
|
if (actualBlockDir.compareTo(expectedBlockDir) != 0) {
|
||||||
+ " has to be upgraded to block ID-based layout. "
|
LOG.warn("Block: " + blockId +
|
||||||
+ "Actual block file path: " + actualBlockFile
|
" found in invalid directory. Expected directory: " +
|
||||||
+ ", expected block file path: " + expBlockFile);
|
expectedBlockDir + ". Actual directory: " + actualBlockDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -414,7 +414,7 @@ private long scanBlock(ExtendedBlock cblock, long bytesPerSec) {
|
|||||||
Block b = volume.getDataset().getStoredBlock(
|
Block b = volume.getDataset().getStoredBlock(
|
||||||
cblock.getBlockPoolId(), cblock.getBlockId());
|
cblock.getBlockPoolId(), cblock.getBlockId());
|
||||||
if (b == null) {
|
if (b == null) {
|
||||||
LOG.info("FileNotFound while finding block {} on volume {}",
|
LOG.info("Replica {} was not found in the VolumeMap for volume {}",
|
||||||
cblock, volume.getBasePath());
|
cblock, volume.getBasePath());
|
||||||
} else {
|
} else {
|
||||||
block = new ExtendedBlock(cblock.getBlockPoolId(), b);
|
block = new ExtendedBlock(cblock.getBlockPoolId(), b);
|
||||||
|
@ -697,6 +697,18 @@ public ExtendedBlock nextBlock() throws IOException {
|
|||||||
} else {
|
} else {
|
||||||
ExtendedBlock block =
|
ExtendedBlock block =
|
||||||
new ExtendedBlock(bpid, Block.filename2id(state.curEntry));
|
new ExtendedBlock(bpid, Block.filename2id(state.curEntry));
|
||||||
|
File expectedBlockDir = DatanodeUtil.idToBlockDir(
|
||||||
|
new File("."), block.getBlockId());
|
||||||
|
File actualBlockDir = Paths.get(".",
|
||||||
|
state.curFinalizedDir, state.curFinalizedSubDir).toFile();
|
||||||
|
if (!expectedBlockDir.equals(actualBlockDir)) {
|
||||||
|
LOG.error("nextBlock({}, {}): block id {} found in invalid " +
|
||||||
|
"directory. Expected directory: {}. " +
|
||||||
|
"Actual directory: {}", storageID, bpid,
|
||||||
|
block.getBlockId(), expectedBlockDir.getPath(),
|
||||||
|
actualBlockDir.getPath());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
LOG.trace("nextBlock({}, {}): advancing to {}",
|
LOG.trace("nextBlock({}, {}): advancing to {}",
|
||||||
storageID, bpid, block);
|
storageID, bpid, block);
|
||||||
return block;
|
return block;
|
||||||
|
@ -135,6 +135,13 @@ interface MaterializedReplica {
|
|||||||
* @throws IOException I/O error.
|
* @throws IOException I/O error.
|
||||||
*/
|
*/
|
||||||
void truncateMeta(long newSize) throws IOException;
|
void truncateMeta(long newSize) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make the replica unreachable, perhaps by renaming it to an
|
||||||
|
* invalid file name.
|
||||||
|
* @throws IOException On I/O error.
|
||||||
|
*/
|
||||||
|
void makeUnreachable() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS;
|
import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@ -38,6 +39,7 @@
|
|||||||
import com.google.common.base.Supplier;
|
import com.google.common.base.Supplier;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.FsDatasetTestUtils.MaterializedReplica;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler;
|
import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -139,6 +141,11 @@ public ExtendedBlock getFileBlock(int nsIdx, int fileIdx)
|
|||||||
throws Exception {
|
throws Exception {
|
||||||
return DFSTestUtil.getFirstBlock(dfs[nsIdx], getPath(fileIdx));
|
return DFSTestUtil.getFirstBlock(dfs[nsIdx], getPath(fileIdx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public MaterializedReplica getMaterializedReplica(int nsIdx, int fileIdx)
|
||||||
|
throws Exception {
|
||||||
|
return cluster.getMaterializedReplica(0, getFileBlock(nsIdx, fileIdx));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -806,4 +813,60 @@ public Boolean get() {
|
|||||||
info.blocksScanned = 0;
|
info.blocksScanned = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that blocks which are in the wrong location are ignored.
|
||||||
|
*/
|
||||||
|
@Test(timeout=120000)
|
||||||
|
public void testIgnoreMisplacedBlock() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
// Set a really long scan period.
|
||||||
|
conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
|
||||||
|
conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
|
||||||
|
TestScanResultHandler.class.getName());
|
||||||
|
conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
|
||||||
|
final TestContext ctx = new TestContext(conf, 1);
|
||||||
|
final int NUM_FILES = 4;
|
||||||
|
ctx.createFiles(0, NUM_FILES, 5);
|
||||||
|
MaterializedReplica unreachableReplica = ctx.getMaterializedReplica(0, 1);
|
||||||
|
ExtendedBlock unreachableBlock = ctx.getFileBlock(0, 1);
|
||||||
|
unreachableReplica.makeUnreachable();
|
||||||
|
final TestScanResultHandler.Info info =
|
||||||
|
TestScanResultHandler.getInfo(ctx.volumes.get(0));
|
||||||
|
String storageID = ctx.volumes.get(0).getStorageID();
|
||||||
|
synchronized (info) {
|
||||||
|
info.sem = new Semaphore(NUM_FILES);
|
||||||
|
info.shouldRun = true;
|
||||||
|
info.notify();
|
||||||
|
}
|
||||||
|
// Scan the first 4 blocks
|
||||||
|
LOG.info("Waiting for the blocks to be scanned.");
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override
|
||||||
|
public Boolean get() {
|
||||||
|
synchronized (info) {
|
||||||
|
if (info.blocksScanned >= NUM_FILES - 1) {
|
||||||
|
LOG.info("info = {}. blockScanned has now reached " +
|
||||||
|
info.blocksScanned, info);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
LOG.info("info = {}. Waiting for blockScanned to reach " +
|
||||||
|
(NUM_FILES - 1), info);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 50, 30000);
|
||||||
|
// We should have scanned 4 blocks
|
||||||
|
synchronized (info) {
|
||||||
|
assertFalse(info.goodBlocks.contains(unreachableBlock));
|
||||||
|
assertFalse(info.badBlocks.contains(unreachableBlock));
|
||||||
|
assertEquals("Expected 3 good blocks.", 3, info.goodBlocks.size());
|
||||||
|
info.goodBlocks.clear();
|
||||||
|
assertEquals("Expected 3 blocksScanned", 3, info.blocksScanned);
|
||||||
|
assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
|
||||||
|
info.blocksScanned = 0;
|
||||||
|
}
|
||||||
|
info.sem.release(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,6 +170,27 @@ public void truncateMeta(long newSize) throws IOException {
|
|||||||
truncate(metaFile, newSize);
|
truncate(metaFile, newSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void makeUnreachable() throws IOException {
|
||||||
|
long blockId = Block.getBlockId(blockFile.getAbsolutePath());
|
||||||
|
File origDir = blockFile.getParentFile();
|
||||||
|
File root = origDir.getParentFile().getParentFile();
|
||||||
|
File newDir = null;
|
||||||
|
// Keep incrementing the block ID until the block and metadata
|
||||||
|
// files end up in a different directory. Actually, with the
|
||||||
|
// current replica file placement scheme, this should only ever
|
||||||
|
// require one increment, but this is a bit of defensive coding.
|
||||||
|
do {
|
||||||
|
blockId++;
|
||||||
|
newDir = DatanodeUtil.idToBlockDir(root, blockId);
|
||||||
|
} while (origDir.equals(newDir));
|
||||||
|
Files.createDirectories(newDir.toPath());
|
||||||
|
Files.move(blockFile.toPath(),
|
||||||
|
new File(newDir, blockFile.getName()).toPath());
|
||||||
|
Files.move(metaFile.toPath(),
|
||||||
|
new File(newDir, metaFile.getName()).toPath());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("MaterializedReplica: file=%s", blockFile);
|
return String.format("MaterializedReplica: file=%s", blockFile);
|
||||||
|
Loading…
Reference in New Issue
Block a user