From 0f6aa9564cbe0812a8cab36d999e353269dd6bc9 Mon Sep 17 00:00:00 2001 From: Virajith Jalaparti Date: Fri, 8 Dec 2017 10:07:40 -0800 Subject: [PATCH] HDFS-12905. [READ] Handle decommissioning and under-maintenance Datanodes with Provided storage. --- .../blockmanagement/ProvidedStorageMap.java | 13 ++- .../TestNameNodeProvidedImplementation.java | 95 +++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java index 7fbc71a071..208ed3e352 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java @@ -342,14 +342,25 @@ DatanodeDescriptor choose(DatanodeDescriptor client, return dn; } } + // prefer live nodes first. + DatanodeDescriptor dn = chooseRandomNode(excludedUUids, true); + if (dn == null) { + dn = chooseRandomNode(excludedUUids, false); + } + return dn; + } + private DatanodeDescriptor chooseRandomNode(Set excludedUUids, + boolean preferLiveNodes) { Random r = new Random(); for (int i = dnR.size() - 1; i >= 0; --i) { int pos = r.nextInt(i + 1); DatanodeDescriptor node = dnR.get(pos); String uuid = node.getDatanodeUuid(); if (!excludedUUids.contains(uuid)) { - return node; + if (!preferLiveNodes || node.getAdminState() == AdminStates.NORMAL) { + return node; + } } Collections.swap(dnR, i, pos); } diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java index d0572472b2..394e8d8a5f 100644 --- a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java +++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.blockmanagement.ProvidedStorageMap; @@ -795,4 +796,98 @@ public void testInMemoryAliasMap() throws Exception { FileUtils.deleteDirectory(tempDirectory); } + private DatanodeDescriptor getDatanodeDescriptor(DatanodeManager dnm, + int dnIndex) throws Exception { + return dnm.getDatanode(cluster.getDataNodes().get(dnIndex).getDatanodeId()); + } + + private void startDecommission(FSNamesystem namesystem, DatanodeManager dnm, + int dnIndex) throws Exception { + namesystem.writeLock(); + DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex); + dnm.getDatanodeAdminManager().startDecommission(dnDesc); + namesystem.writeUnlock(); + } + + private void startMaintenance(FSNamesystem namesystem, DatanodeManager dnm, + int dnIndex) throws Exception { + namesystem.writeLock(); + DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex); + dnm.getDatanodeAdminManager().startMaintenance(dnDesc, Long.MAX_VALUE); + namesystem.writeUnlock(); + } + + private void stopMaintenance(FSNamesystem namesystem, DatanodeManager dnm, + int dnIndex) throws Exception { + namesystem.writeLock(); + DatanodeDescriptor dnDesc = getDatanodeDescriptor(dnm, dnIndex); + dnm.getDatanodeAdminManager().stopMaintenance(dnDesc); + namesystem.writeUnlock(); + } + + @Test + public void testDatanodeLifeCycle() throws Exception { + createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH, + FixedBlockResolver.class); + startCluster(NNDIRPATH, 3, + new StorageType[] {StorageType.PROVIDED, StorageType.DISK}, + null, false); + + int fileIndex = numFiles -1; + + final BlockManager blockManager = cluster.getNamesystem().getBlockManager(); + final DatanodeManager dnm = blockManager.getDatanodeManager(); + + // to start, all 3 DNs are live in ProvidedDatanodeDescriptor. + verifyFileLocation(fileIndex, 3); + + // de-commision first DN; still get 3 replicas. + startDecommission(cluster.getNamesystem(), dnm, 0); + verifyFileLocation(fileIndex, 3); + + // remains the same even after heartbeats. + cluster.triggerHeartbeats(); + verifyFileLocation(fileIndex, 3); + + // start maintenance for 2nd DN; still get 3 replicas. + startMaintenance(cluster.getNamesystem(), dnm, 1); + verifyFileLocation(fileIndex, 3); + + DataNode dn1 = cluster.getDataNodes().get(0); + DataNode dn2 = cluster.getDataNodes().get(1); + + // stop the 1st DN while being decomissioned. + MiniDFSCluster.DataNodeProperties dn1Properties = cluster.stopDataNode(0); + BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), + dn1.getDatanodeId().getXferAddr()); + + // get 2 locations + verifyFileLocation(fileIndex, 2); + + // stop dn2 while in maintenance. + MiniDFSCluster.DataNodeProperties dn2Properties = cluster.stopDataNode(1); + BlockManagerTestUtil.noticeDeadDatanode(cluster.getNameNode(), + dn2.getDatanodeId().getXferAddr()); + + // 2 valid locations will be found as blocks on nodes that die during + // maintenance are not marked for removal. + verifyFileLocation(fileIndex, 2); + + // stop the maintenance; get only 1 replicas + stopMaintenance(cluster.getNamesystem(), dnm, 0); + verifyFileLocation(fileIndex, 1); + + // restart the stopped DN. + cluster.restartDataNode(dn1Properties, true); + cluster.waitActive(); + + // reports all 3 replicas + verifyFileLocation(fileIndex, 2); + + cluster.restartDataNode(dn2Properties, true); + cluster.waitActive(); + + // reports all 3 replicas + verifyFileLocation(fileIndex, 3); + } }