HDFS-16841. Enhance the function of DebugAdmin#VerifyECCommand (#5137)
This commit is contained in:
parent
bcc3d2a20e
commit
ef84d21867
@ -432,8 +432,13 @@ private class VerifyECCommand extends DebugCommand {
|
||||
|
||||
VerifyECCommand() {
|
||||
super("verifyEC",
|
||||
"verifyEC -file <file>",
|
||||
" Verify HDFS erasure coding on all block groups of the file.");
|
||||
"verifyEC -file <file> [-blockId <blk_Id>] [-skipFailureBlocks]",
|
||||
" -file Verify HDFS erasure coding on all block groups of the file." +
|
||||
System.lineSeparator() +
|
||||
" -skipFailureBlocks specify will skip any block group failures during verify," +
|
||||
" and continues verify all block groups of the file," + System.lineSeparator() +
|
||||
" the default is not to skip failure blocks." + System.lineSeparator() +
|
||||
" -blockId specify blk_Id to verify for a specific one block group.");
|
||||
}
|
||||
|
||||
int run(List<String> args) throws IOException {
|
||||
@ -480,30 +485,48 @@ int run(List<String> args) throws IOException {
|
||||
this.parityBlkNum = ecPolicy.getNumParityUnits();
|
||||
this.cellSize = ecPolicy.getCellSize();
|
||||
this.encoder = CodecUtil.createRawEncoder(getConf(), ecPolicy.getCodecName(),
|
||||
new ErasureCoderOptions(
|
||||
ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits()));
|
||||
new ErasureCoderOptions(dataBlkNum, parityBlkNum));
|
||||
int blockNum = dataBlkNum + parityBlkNum;
|
||||
this.readService = new ExecutorCompletionService<>(
|
||||
DFSUtilClient.getThreadPoolExecutor(blockNum, blockNum, 60,
|
||||
new LinkedBlockingQueue<>(), "read-", false));
|
||||
this.blockReaders = new BlockReader[dataBlkNum + parityBlkNum];
|
||||
this.blockReaders = new BlockReader[blockNum];
|
||||
|
||||
String needToVerifyBlockId = StringUtils.popOptionWithArgument("-blockId", args);
|
||||
boolean skipFailureBlocks = StringUtils.popOption("-skipFailureBlocks", args);
|
||||
boolean isHealthy = true;
|
||||
|
||||
for (LocatedBlock locatedBlock : locatedBlocks.getLocatedBlocks()) {
|
||||
System.out.println("Checking EC block group: blk_" + locatedBlock.getBlock().getBlockId());
|
||||
LocatedStripedBlock blockGroup = (LocatedStripedBlock) locatedBlock;
|
||||
String blockName = locatedBlock.getBlock().getBlockName();
|
||||
if (needToVerifyBlockId == null || needToVerifyBlockId.equals(blockName)) {
|
||||
System.out.println("Checking EC block group: " + blockName);
|
||||
LocatedStripedBlock blockGroup = (LocatedStripedBlock) locatedBlock;
|
||||
|
||||
try {
|
||||
verifyBlockGroup(blockGroup);
|
||||
System.out.println("Status: OK");
|
||||
} catch (Exception e) {
|
||||
System.err.println("Status: ERROR, message: " + e.getMessage());
|
||||
return 1;
|
||||
} finally {
|
||||
closeBlockReaders();
|
||||
try {
|
||||
verifyBlockGroup(blockGroup);
|
||||
System.out.println("Status: OK");
|
||||
} catch (Exception e) {
|
||||
System.err.println("Status: ERROR, message: " + e.getMessage());
|
||||
isHealthy = false;
|
||||
if (!skipFailureBlocks) {
|
||||
break;
|
||||
}
|
||||
} finally {
|
||||
closeBlockReaders();
|
||||
}
|
||||
|
||||
if (needToVerifyBlockId != null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("\nAll EC block group status: OK");
|
||||
return 0;
|
||||
if (isHealthy) {
|
||||
if (needToVerifyBlockId == null) {
|
||||
System.out.println("\nAll EC block group status: OK");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
private void verifyBlockGroup(LocatedStripedBlock blockGroup) throws Exception {
|
||||
|
@ -194,8 +194,13 @@ public void testVerifyECCommand() throws Exception {
|
||||
cluster.waitActive();
|
||||
DistributedFileSystem fs = cluster.getFileSystem();
|
||||
|
||||
assertEquals("ret: 1, verifyEC -file <file> Verify HDFS erasure coding on " +
|
||||
"all block groups of the file.", runCmd(new String[]{"verifyEC"}));
|
||||
assertEquals("ret: 1, verifyEC -file <file> [-blockId <blk_Id>] " +
|
||||
"[-skipFailureBlocks] -file Verify HDFS erasure coding on all block groups of the file." +
|
||||
" -skipFailureBlocks specify will skip any block group failures during verify," +
|
||||
" and continues verify all block groups of the file," +
|
||||
" the default is not to skip failure blocks." +
|
||||
" -blockId specify blk_Id to verify for a specific one block group.",
|
||||
runCmd(new String[]{"verifyEC"}));
|
||||
|
||||
assertEquals("ret: 1, File /bar does not exist.",
|
||||
runCmd(new String[]{"verifyEC", "-file", "/bar"}));
|
||||
@ -270,6 +275,41 @@ public void testVerifyECCommand() throws Exception {
|
||||
"-out", metaFile.getAbsolutePath()});
|
||||
assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_corrupt"})
|
||||
.contains("Status: ERROR, message: EC compute result not match."));
|
||||
|
||||
// Specify -blockId.
|
||||
Path newFile = new Path(ecDir, "foo_new");
|
||||
DFSTestUtil.createFile(fs, newFile, (int) k, 6 * m, m, repl, seed);
|
||||
blocks = DFSTestUtil.getAllBlocks(fs, newFile);
|
||||
assertEquals(2, blocks.size());
|
||||
blockGroup = (LocatedStripedBlock) blocks.get(0);
|
||||
String blockName = blockGroup.getBlock().getBlockName();
|
||||
assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_new", "-blockId", blockName})
|
||||
.contains("ret: 0, Checking EC block group: " + blockName + "Status: OK"));
|
||||
|
||||
// Specify -verifyAllFailures.
|
||||
indexedBlocks = StripedBlockUtil.parseStripedBlockGroup(blockGroup,
|
||||
ecPolicy.getCellSize(), ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits());
|
||||
// Try corrupt block 0 in block group.
|
||||
toCorruptLocatedBlock = indexedBlocks[0];
|
||||
toCorruptBlock = toCorruptLocatedBlock.getBlock();
|
||||
datanode = cluster.getDataNode(toCorruptLocatedBlock.getLocations()[0].getIpcPort());
|
||||
blockFile = getBlockFile(datanode.getFSDataset(),
|
||||
toCorruptBlock.getBlockPoolId(), toCorruptBlock.getLocalBlock());
|
||||
metaFile = getMetaFile(datanode.getFSDataset(),
|
||||
toCorruptBlock.getBlockPoolId(), toCorruptBlock.getLocalBlock());
|
||||
metaFile.delete();
|
||||
// Write error bytes to block file and re-generate meta checksum.
|
||||
errorBytes = new byte[1048576];
|
||||
new Random(0x12345678L).nextBytes(errorBytes);
|
||||
FileUtils.writeByteArrayToFile(blockFile, errorBytes);
|
||||
runCmd(new String[]{"computeMeta", "-block", blockFile.getAbsolutePath(),
|
||||
"-out", metaFile.getAbsolutePath()});
|
||||
// VerifyEC and set skipFailureBlocks.
|
||||
LocatedStripedBlock blockGroup2 = (LocatedStripedBlock) blocks.get(1);
|
||||
assertTrue(runCmd(new String[]{"verifyEC", "-file", "/ec/foo_new", "-skipFailureBlocks"})
|
||||
.contains("ret: 1, Checking EC block group: " + blockGroup.getBlock().getBlockName() +
|
||||
"Status: ERROR, message: EC compute result not match." +
|
||||
"Checking EC block group: " + blockGroup2.getBlock().getBlockName() + "Status: OK"));
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user