From 1869e1771c7eeea46ccb822ce6f7081d994bb12c Mon Sep 17 00:00:00 2001 From: Brahma Reddy Battula Date: Wed, 7 Jun 2017 13:05:03 +0800 Subject: [PATCH] HDFS-11711. DN should not delete the block On "Too many open files" Exception. Contributed by Brahma Reddy Battula. --- .../hdfs/server/datanode/BlockSender.java | 13 ++-- .../datanode/DataNodeFaultInjector.java | 4 ++ .../server/datanode/TestDataNodeMetrics.java | 62 +++++++++++++++++++ 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index 203ee35789..3ff5c75466 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -302,6 +302,7 @@ class BlockSender implements java.io.Closeable { LengthInputStream metaIn = null; boolean keepMetaInOpen = false; try { + DataNodeFaultInjector.get().throwTooManyOpenFiles(); metaIn = datanode.data.getMetaDataInputStream(block); if (!corruptChecksumOk || metaIn != null) { if (metaIn == null) { @@ -331,10 +332,14 @@ class BlockSender implements java.io.Closeable { LOG.warn("Could not find metadata file for " + block); } } catch (FileNotFoundException e) { - // The replica is on its volume map but not on disk - datanode.notifyNamenodeDeletedBlock(block, replica.getStorageUuid()); - datanode.data.invalidate(block.getBlockPoolId(), - new Block[]{block.getLocalBlock()}); + if ((e.getMessage() != null) && !(e.getMessage() + .contains("Too many open files"))) { + // The replica is on its volume map but not on disk + datanode + .notifyNamenodeDeletedBlock(block, replica.getStorageUuid()); + datanode.data.invalidate(block.getBlockPoolId(), + new Block[] {block.getLocalBlock()}); + } throw e; } finally { if (!keepMetaInOpen) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java index d2d557f1c4..0a2a60be44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java @@ -21,6 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience; +import java.io.FileNotFoundException; import java.io.IOException; /** @@ -85,4 +86,7 @@ public void failPipeline(ReplicaInPipeline replicaInfo, public void startOfferService() throws Exception {} public void endOfferService() throws Exception {} + + public void throwTooManyOpenFiles() throws FileNotFoundException { + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java index 9abc19dcbe..7b3dea70e5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java @@ -24,8 +24,10 @@ import static org.junit.Assert.*; import java.io.Closeable; +import java.io.FileNotFoundException; import java.io.IOException; import java.lang.management.ManagementFactory; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.List; @@ -45,8 +47,10 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.metrics2.MetricsRecordBuilder; @@ -365,4 +369,62 @@ public void testDatanodeActiveXceiversCount() throws Exception { } } } + + @Test + public void testDNShouldNotDeleteBlockONTooManyOpenFiles() + throws Exception { + Configuration conf = new HdfsConfiguration(); + conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); + conf.setLong(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 1); + DataNodeFaultInjector oldInjector = DataNodeFaultInjector.get(); + MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + final DataNodeFaultInjector injector = + Mockito.mock(DataNodeFaultInjector.class); + try { + // wait until the cluster is up + cluster.waitActive(); + DistributedFileSystem fs = cluster.getFileSystem(); + Path p = new Path("/testShouldThrowTMP"); + DFSTestUtil.writeFile(fs, p, new String("testdata")); + //Before DN throws too many open files + verifyBlockLocations(fs, p, 1); + Mockito.doThrow(new FileNotFoundException("Too many open files")). + when(injector). + throwTooManyOpenFiles(); + DataNodeFaultInjector.set(injector); + ExtendedBlock b = + fs.getClient().getLocatedBlocks(p.toString(), 0).get(0).getBlock(); + try { + new BlockSender(b, 0, -1, false, true, true, + cluster.getDataNodes().get(0), null, + CachingStrategy.newDefaultStrategy()); + fail("Must throw FileNotFoundException"); + } catch (FileNotFoundException fe) { + assertTrue("Should throw too many open files", + fe.getMessage().contains("Too many open files")); + } + cluster.triggerHeartbeats(); // IBR delete ack + //After DN throws too many open files + assertTrue(cluster.getDataNodes().get(0).getFSDataset().isValidBlock(b)); + verifyBlockLocations(fs, p, 1); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + DataNodeFaultInjector.set(oldInjector); + } + } + + private void verifyBlockLocations(DistributedFileSystem fs, Path p, + int expected) throws IOException, TimeoutException, InterruptedException { + final LocatedBlock lb = + fs.getClient().getLocatedBlocks(p.toString(), 0).get(0); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return lb.getLocations().length == expected; + } + }, 1000, 6000); + } }