diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 3b7b13dc72..a86f1326d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1701,14 +1701,14 @@ void initBlockPool(BPOfferService bpos) throws IOException { // the dataset, block scanners, etc. initStorage(nsInfo); - // Exclude failed disks before initializing the block pools to avoid startup - // failures. - checkDiskError(); try { data.addBlockPool(nsInfo.getBlockPoolID(), getConf()); } catch (AddBlockPoolException e) { handleAddBlockPoolError(e); } + // HDFS-14993: check disk after add the block pool info. + checkDiskError(); + blockScanner.enableBlockPoolId(bpos.getBlockPoolId()); initDirectoryScanner(getConf()); initDiskBalancer(data, getConf()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 7ad012bf92..c4527514e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -916,4 +916,35 @@ public boolean isSimulated() { } } } + + /* + * Verify the failed volume can be cheched during dn startup + */ + @Test(timeout = 120000) + public void testVolumeFailureDuringStartup() throws Exception { + LOG.debug("Data dir: is " + dataDir.getPath()); + + // fail the volume + data_fail = cluster.getInstanceStorageDir(1, 0); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + + // restart the dn + cluster.restartDataNode(1); + final DataNode dn = cluster.getDataNodes().get(1); + + // should get the failed volume during startup + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return dn.getFSDataset() !=null && + dn.getFSDataset().getVolumeFailureSummary() != null && + dn.getFSDataset().getVolumeFailureSummary(). + getFailedStorageLocations()!= null && + dn.getFSDataset().getVolumeFailureSummary(). + getFailedStorageLocations().length == 1; + } + }, 10, 30 * 1000); + } }