From 703158c9c66cd8975be9283d8c9d5a78b3d6dd2b Mon Sep 17 00:00:00 2001 From: Tom Date: Wed, 1 Mar 2023 16:44:30 -0800 Subject: [PATCH] =?UTF-8?q?HDFS-16896=20clear=20ignoredNodes=20list=20when?= =?UTF-8?q?=20we=20clear=20deadnode=20list=20on=20ref=E2=80=A6=20(#5322)?= =?UTF-8?q?=20(#5444)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry picked from: 162288bc0af944f116a4dd73e27f4676a204d9e9 Co-authored-by: Tom McCormick --- .../apache/hadoop/hdfs/DFSInputStream.java | 34 ++++++++++++++++--- .../TestDFSInputStreamBlockLocations.java | 23 +++++++++++++ .../org/apache/hadoop/hdfs/TestPread.java | 4 ++- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index 485b6b42ad..8a0bc95d13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -224,7 +224,7 @@ boolean deadNodesContain(DatanodeInfo nodeInfo) { } /** - * Grab the open-file info from namenode + * Grab the open-file info from namenode. * @param refreshLocatedBlocks whether to re-fetch locatedblocks */ void openInfo(boolean refreshLocatedBlocks) throws IOException { @@ -940,7 +940,8 @@ private DNAddrPair chooseDataNode(LocatedBlock block, * @return Returns chosen DNAddrPair; Can be null if refetchIfRequired is * false. */ - private DNAddrPair chooseDataNode(LocatedBlock block, + @VisibleForTesting + DNAddrPair chooseDataNode(LocatedBlock block, Collection ignoredNodes, boolean refetchIfRequired) throws IOException { while (true) { @@ -955,6 +956,14 @@ private DNAddrPair chooseDataNode(LocatedBlock block, } } + /** + * RefetchLocations should only be called when there are no active requests + * to datanodes. In the hedged read case this means futures should be empty. + * @param block The locatedBlock to get new datanode locations for. + * @param ignoredNodes A list of ignored nodes. This list can be null and can be cleared. + * @return the locatedBlock with updated datanode locations. + * @throws IOException + */ private LocatedBlock refetchLocations(LocatedBlock block, Collection ignoredNodes) throws IOException { String errMsg = getBestNodeDNAddrPairErrorString(block.getLocations(), @@ -999,13 +1008,24 @@ private LocatedBlock refetchLocations(LocatedBlock block, throw new InterruptedIOException( "Interrupted while choosing DataNode for read."); } - clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] + clearCachedNodeState(ignoredNodes); openInfo(true); block = refreshLocatedBlock(block); failures++; return block; } + /** + * Clear both the dead nodes and the ignored nodes + * @param ignoredNodes is cleared + */ + private void clearCachedNodeState(Collection ignoredNodes) { + clearLocalDeadNodes(); //2nd option is to remove only nodes[blockId] + if (ignoredNodes != null) { + ignoredNodes.clear(); + } + } + /** * Get the best node from which to stream the data. * @param block LocatedBlock, containing nodes in priority order. @@ -1337,8 +1357,12 @@ private void hedgedFetchBlockByteRange(LocatedBlock block, long start, } catch (InterruptedException ie) { // Ignore and retry } - if (refetch) { - refetchLocations(block, ignored); + // If refetch is true, then all nodes are in deadNodes or ignoredNodes. + // We should loop through all futures and remove them, so we do not + // have concurrent requests to the same node. + // Once all futures are cleared, we can clear the ignoredNodes and retry. + if (refetch && futures.isEmpty()) { + block = refetchLocations(block, ignored); } // We got here if exception. Ignore this node on next go around IFF // we found a chosenNode to hedge read against. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java index 50378f6038..2e4e496bc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInputStreamBlockLocations.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -35,11 +36,14 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.util.Time; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -200,6 +204,25 @@ public void testDeferredRegistrationGetAllBlocks() throws IOException { testWithRegistrationMethod(DFSInputStream::getAllBlocks); } + /** + * If the ignoreList contains all datanodes, the ignoredList should be cleared to take advantage + * of retries built into chooseDataNode. This is needed for hedged reads + * @throws IOException + */ + @Test + public void testClearIgnoreListChooseDataNode() throws IOException { + final String fileName = "/test_cache_locations"; + filePath = createFile(fileName); + + try (DFSInputStream fin = dfsClient.open(fileName)) { + LocatedBlocks existing = fin.locatedBlocks; + LocatedBlock block = existing.getLastLocatedBlock(); + ArrayList ignoreList = new ArrayList<>(Arrays.asList(block.getLocations())); + Assert.assertNotNull(fin.chooseDataNode(block, ignoreList, true)); + Assert.assertEquals(0, ignoreList.size()); + } + } + @FunctionalInterface interface ThrowingConsumer { void accept(DFSInputStream fin) throws IOException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index 8cc68147d7..5b4b18ede9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -603,7 +603,9 @@ public Void answer(InvocationOnMock invocation) throws Throwable { input.read(0, buffer, 0, 1024); Assert.fail("Reading the block should have thrown BlockMissingException"); } catch (BlockMissingException e) { - assertEquals(3, input.getHedgedReadOpsLoopNumForTesting()); + // The result of 9 is due to 2 blocks by 4 iterations plus one because + // hedgedReadOpsLoopNumForTesting is incremented at start of the loop. + assertEquals(9, input.getHedgedReadOpsLoopNumForTesting()); assertTrue(metrics.getHedgedReadOps() == 0); } finally { Mockito.reset(injector);