From 5c1036d598051cf6af595740f1ab82092b0b6554 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Tue, 10 Mar 2015 18:20:25 -0700 Subject: [PATCH] HDFS-7830. DataNode does not release the volume lock when adding a volume fails. (Lei Xu via Colin P. McCabe) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../hadoop/hdfs/server/common/Storage.java | 2 +- .../fsdataset/impl/FsDatasetImpl.java | 16 ++++++- .../datanode/TestDataNodeHotSwapVolumes.java | 34 ++------------- .../fsdataset/impl/FsDatasetTestUtil.java | 43 +++++++++++++++++++ .../fsdataset/impl/TestFsDatasetImpl.java | 43 +++++++++++++++++++ 6 files changed, 108 insertions(+), 33 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index a2e552a6be..0ba34a47bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1133,6 +1133,9 @@ Release 2.7.0 - UNRELEASED HDFS-7818. OffsetParam should return the default value instead of throwing NPE when the value is unspecified. (Eric Payne via wheat9) + HDFS-7830. DataNode does not release the volume lock when adding a volume + fails. (Lei Xu via Colin P. Mccabe) + BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS HDFS-7720. Quota by Storage Type API, tools and ClientNameNode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java index e6bd5b2893..e6f099983d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java @@ -672,7 +672,7 @@ public boolean isShared() { */ public void lock() throws IOException { if (isShared()) { - LOG.info("Locking is disabled"); + LOG.info("Locking is disabled for " + this.root); return; } FileLock newLock = tryLock(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 58f5615f34..0f28aa4e53 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -46,6 +46,7 @@ import javax.management.ObjectName; import javax.management.StandardMBean; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -375,6 +376,12 @@ private void addVolume(Collection dataLocations, LOG.info("Added volume - " + dir + ", StorageType: " + storageType); } + @VisibleForTesting + public FsVolumeImpl createFsVolume(String storageUuid, File currentDir, + StorageType storageType) throws IOException { + return new FsVolumeImpl(this, storageUuid, currentDir, conf, storageType); + } + @Override public void addVolume(final StorageLocation location, final List nsInfos) @@ -394,8 +401,8 @@ public void addVolume(final StorageLocation location, final Storage.StorageDirectory sd = builder.getStorageDirectory(); StorageType storageType = location.getStorageType(); - final FsVolumeImpl fsVolume = new FsVolumeImpl( - this, sd.getStorageUuid(), sd.getCurrentDir(), this.conf, storageType); + final FsVolumeImpl fsVolume = + createFsVolume(sd.getStorageUuid(), sd.getCurrentDir(), storageType); final ReplicaMap tempVolumeMap = new ReplicaMap(fsVolume); ArrayList exceptions = Lists.newArrayList(); @@ -411,6 +418,11 @@ public void addVolume(final StorageLocation location, } } if (!exceptions.isEmpty()) { + try { + sd.unlock(); + } catch (IOException e) { + exceptions.add(e); + } throw MultipleIOException.createIOException(exceptions); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java index 3d0bccc47a..ac316e864f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java @@ -37,18 +37,14 @@ import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; -import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; import org.junit.Test; import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; -import java.nio.channels.FileLock; -import java.nio.channels.OverlappingFileLockException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -70,7 +66,6 @@ import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -538,31 +533,10 @@ public void testAddVolumeFailures() throws IOException { private static void assertFileLocksReleased(Collection dirs) throws IOException { for (String dir: dirs) { - StorageLocation sl = StorageLocation.parse(dir); - File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK); - RandomAccessFile raf = null; - FileChannel channel = null; - FileLock lock = null; try { - raf = new RandomAccessFile(lockFile, "rws"); - channel = raf.getChannel(); - lock = channel.tryLock(); - assertNotNull(String.format( - "Lock file at %s appears to be held by a different process.", - lockFile.getAbsolutePath()), lock); - } catch (OverlappingFileLockException e) { - fail(String.format("Must release lock file at %s.", - lockFile.getAbsolutePath())); - } finally { - if (lock != null) { - try { - lock.release(); - } catch (IOException e) { - LOG.warn(String.format("I/O error releasing file lock %s.", - lockFile.getAbsolutePath()), e); - } - } - IOUtils.cleanup(null, channel, raf); + FsDatasetTestUtil.assertFileLockReleased(dir); + } catch (IOException e) { + LOG.warn(e); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java index f9e30e12a0..7ac9b65db2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetTestUtil.java @@ -19,13 +19,24 @@ import java.io.File; import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; +import java.nio.channels.OverlappingFileLockException; import java.util.Collection; +import java.util.Random; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; +import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; +import org.apache.hadoop.io.IOUtils; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; public class FsDatasetTestUtil { @@ -72,4 +83,36 @@ public static void stopLazyWriter(DataNode dn) { FsDatasetImpl fsDataset = ((FsDatasetImpl) dn.getFSDataset()); ((FsDatasetImpl.LazyWriter) fsDataset.lazyWriter.getRunnable()).stop(); } + + /** + * Asserts that the storage lock file in the given directory has been + * released. This method works by trying to acquire the lock file itself. If + * locking fails here, then the main code must have failed to release it. + * + * @param dir the storage directory to check + * @throws IOException if there is an unexpected I/O error + */ + public static void assertFileLockReleased(String dir) throws IOException { + StorageLocation sl = StorageLocation.parse(dir); + File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK); + try (RandomAccessFile raf = new RandomAccessFile(lockFile, "rws"); + FileChannel channel = raf.getChannel()) { + FileLock lock = channel.tryLock(); + assertNotNull(String.format( + "Lock file at %s appears to be held by a different process.", + lockFile.getAbsolutePath()), lock); + if (lock != null) { + try { + lock.release(); + } catch (IOException e) { + FsDatasetImpl.LOG.warn(String.format("I/O error releasing file lock %s.", + lockFile.getAbsolutePath()), e); + throw e; + } + } + } catch (OverlappingFileLockException e) { + fail(String.format("Must release lock file at %s.", + lockFile.getAbsolutePath())); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 8c28033b9e..3b47dd095d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -35,11 +35,13 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; import org.apache.hadoop.hdfs.server.datanode.fsdataset.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.io.MultipleIOException; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.StringUtils; import org.junit.Before; import org.junit.Test; +import org.mockito.Matchers; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -57,12 +59,17 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyList; import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -291,4 +298,40 @@ public Object answer(InvocationOnMock invocationOnMock) assertFalse(volumeList.getVolumes().contains(brokenVolume)); assertEquals(NUM_VOLUMES - 1, volumeList.getVolumes().size()); } + + @Test + public void testAddVolumeFailureReleasesInUseLock() throws IOException { + FsDatasetImpl spyDataset = spy(dataset); + FsVolumeImpl mockVolume = mock(FsVolumeImpl.class); + File badDir = new File(BASE_DIR, "bad"); + badDir.mkdirs(); + doReturn(mockVolume).when(spyDataset) + .createFsVolume(anyString(), any(File.class), any(StorageType.class)); + doThrow(new IOException("Failed to getVolumeMap()")) + .when(mockVolume).getVolumeMap( + anyString(), + any(ReplicaMap.class), + any(RamDiskReplicaLruTracker.class)); + + Storage.StorageDirectory sd = createStorageDirectory(badDir); + sd.lock(); + DataStorage.VolumeBuilder builder = new DataStorage.VolumeBuilder(storage, sd); + when(storage.prepareVolume(eq(datanode), eq(badDir), + Matchers.>any())) + .thenReturn(builder); + + StorageLocation location = StorageLocation.parse(badDir.toString()); + List nsInfos = Lists.newArrayList(); + for (String bpid : BLOCK_POOL_IDS) { + nsInfos.add(new NamespaceInfo(0, CLUSTER_ID, bpid, 1)); + } + + try { + spyDataset.addVolume(location, nsInfos); + fail("Expect to throw MultipleIOException"); + } catch (MultipleIOException e) { + } + + FsDatasetTestUtil.assertFileLockReleased(badDir.toString()); + } }