From a48301791e9564363bc2abad4e89e344b0d7a5ff Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Fri, 11 Dec 2015 08:44:47 -0600 Subject: [PATCH] HDFS-9445. Datanode may deadlock while handling a bad volume. Contributed by Walter Su. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../fsdataset/impl/FsDatasetImpl.java | 87 +++++++++++-------- .../fsdataset/impl/TestFsDatasetImpl.java | 4 + 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1696053ab9..c1a323b923 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2574,6 +2574,9 @@ Release 2.7.2 - UNRELEASED HDFS-9294. DFSClient deadlock when close file and failed to renew lease. (Brahma Reddy Battula via szetszwo) + HDFS-9445. Datanode may deadlock while handling a bad volume. + (Wlater Su via Kihwal) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 1d8c705f7b..afa4dee5d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -470,48 +470,67 @@ public void addVolume(final StorageLocation location, * Removes a set of volumes from FsDataset. * @param volumesToRemove a set of absolute root path of each volume. * @param clearFailure set true to clear failure information. - * - * DataNode should call this function before calling - * {@link DataStorage#removeVolumes(java.util.Collection)}. */ @Override - public synchronized void removeVolumes( - Set volumesToRemove, boolean clearFailure) { + public void removeVolumes(Set volumesToRemove, boolean clearFailure) { // Make sure that all volumes are absolute path. for (File vol : volumesToRemove) { Preconditions.checkArgument(vol.isAbsolute(), String.format("%s is not absolute path.", vol.getPath())); } - for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { - Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); - final File absRoot = sd.getRoot().getAbsoluteFile(); - if (volumesToRemove.contains(absRoot)) { - LOG.info("Removing " + absRoot + " from FsDataset."); - // Disable the volume from the service. - asyncDiskService.removeVolume(sd.getCurrentDir()); - volumes.removeVolume(absRoot, clearFailure); + Map> blkToInvalidate = new HashMap<>(); + List storageToRemove = new ArrayList<>(); + synchronized (this) { + for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { + Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); + final File absRoot = sd.getRoot().getAbsoluteFile(); + if (volumesToRemove.contains(absRoot)) { + LOG.info("Removing " + absRoot + " from FsDataset."); - // Removed all replica information for the blocks on the volume. Unlike - // updating the volumeMap in addVolume(), this operation does not scan - // disks. - for (String bpid : volumeMap.getBlockPoolList()) { - for (Iterator it = volumeMap.replicas(bpid).iterator(); - it.hasNext(); ) { - ReplicaInfo block = it.next(); - final File absBasePath = - new File(block.getVolume().getBasePath()).getAbsoluteFile(); - if (absBasePath.equals(absRoot)) { - invalidate(bpid, block); - it.remove(); + // Disable the volume from the service. + asyncDiskService.removeVolume(sd.getCurrentDir()); + volumes.removeVolume(absRoot, clearFailure); + + // Removed all replica information for the blocks on the volume. + // Unlike updating the volumeMap in addVolume(), this operation does + // not scan disks. + for (String bpid : volumeMap.getBlockPoolList()) { + List blocks = new ArrayList<>(); + for (Iterator it = volumeMap.replicas(bpid).iterator(); + it.hasNext(); ) { + ReplicaInfo block = it.next(); + final File absBasePath = + new File(block.getVolume().getBasePath()).getAbsoluteFile(); + if (absBasePath.equals(absRoot)) { + blocks.add(block); + it.remove(); + } } + blkToInvalidate.put(bpid, blocks); } - } - storageMap.remove(sd.getStorageUuid()); + storageToRemove.add(sd.getStorageUuid()); + } + } + setupAsyncLazyPersistThreads(); + } + + // Call this outside the lock. + for (Map.Entry> entry : + blkToInvalidate.entrySet()) { + String bpid = entry.getKey(); + List blocks = entry.getValue(); + for (ReplicaInfo block : blocks) { + invalidate(bpid, block); + } + } + + synchronized (this) { + for(String storageUuid : storageToRemove) { + storageMap.remove(storageUuid); } } - setupAsyncLazyPersistThreads(); } private StorageType getStorageTypeFromLocations( @@ -1931,15 +1950,11 @@ public void invalidate(String bpid, Block invalidBlks[]) throws IOException { public void invalidate(String bpid, ReplicaInfo block) { // If a DFSClient has the replica in its cache of short-circuit file // descriptors (and the client is using ShortCircuitShm), invalidate it. - // The short-circuit registry is null in the unit tests, because the - // datanode is mock object. - if (datanode.getShortCircuitRegistry() != null) { - datanode.getShortCircuitRegistry().processBlockInvalidation( - new ExtendedBlockId(block.getBlockId(), bpid)); + datanode.getShortCircuitRegistry().processBlockInvalidation( + new ExtendedBlockId(block.getBlockId(), bpid)); - // If the block is cached, start uncaching it. - cacheManager.uncacheBlock(bpid, block.getBlockId()); - } + // If the block is cached, start uncaching it. + cacheManager.uncacheBlock(bpid, block.getBlockId()); datanode.notifyNamenodeDeletedBlock(new ExtendedBlock(bpid, block), block.getStorageUuid()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 62907ec065..a3d5769182 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; import org.apache.hadoop.hdfs.server.datanode.ReplicaHandler; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; +import org.apache.hadoop.hdfs.server.datanode.ShortCircuitRegistry; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; @@ -147,6 +148,9 @@ public void setUp() throws IOException { when(datanode.getDnConf()).thenReturn(dnConf); final BlockScanner disabledBlockScanner = new BlockScanner(datanode, conf); when(datanode.getBlockScanner()).thenReturn(disabledBlockScanner); + final ShortCircuitRegistry shortCircuitRegistry = + new ShortCircuitRegistry(conf); + when(datanode.getShortCircuitRegistry()).thenReturn(shortCircuitRegistry); createStorageDirs(storage, conf, NUM_INIT_VOLUMES); dataset = new FsDatasetImpl(datanode, storage, conf);