From 87c198468bb6a6312bbb27b174c18822b6b9ccf8 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 28 Jan 2020 22:59:26 +0530 Subject: [PATCH] HDFS-14993. checkDiskError doesn't work during datanode startup. Contributed by Yang Yun. --- .../hadoop/hdfs/server/datanode/DataNode.java | 6 ++-- .../datanode/TestDataNodeVolumeFailure.java | 31 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 3b7b13dc72..a86f1326d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1701,14 +1701,14 @@ void initBlockPool(BPOfferService bpos) throws IOException { // the dataset, block scanners, etc. initStorage(nsInfo); - // Exclude failed disks before initializing the block pools to avoid startup - // failures. - checkDiskError(); try { data.addBlockPool(nsInfo.getBlockPoolID(), getConf()); } catch (AddBlockPoolException e) { handleAddBlockPoolError(e); } + // HDFS-14993: check disk after add the block pool info. + checkDiskError(); + blockScanner.enableBlockPoolId(bpos.getBlockPoolId()); initDirectoryScanner(getConf()); initDiskBalancer(data, getConf()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 7ad012bf92..c4527514e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -916,4 +916,35 @@ public boolean isSimulated() { } } } + + /* + * Verify the failed volume can be cheched during dn startup + */ + @Test(timeout = 120000) + public void testVolumeFailureDuringStartup() throws Exception { + LOG.debug("Data dir: is " + dataDir.getPath()); + + // fail the volume + data_fail = cluster.getInstanceStorageDir(1, 0); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + + // restart the dn + cluster.restartDataNode(1); + final DataNode dn = cluster.getDataNodes().get(1); + + // should get the failed volume during startup + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return dn.getFSDataset() !=null && + dn.getFSDataset().getVolumeFailureSummary() != null && + dn.getFSDataset().getVolumeFailureSummary(). + getFailedStorageLocations()!= null && + dn.getFSDataset().getVolumeFailureSummary(). + getFailedStorageLocations().length == 1; + } + }, 10, 30 * 1000); + } }