diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-2802.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-2802.txt index 4acbd45d9c..899aead0d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-2802.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-2802.txt @@ -56,3 +56,6 @@ Branch-2802 Snapshot (Unreleased) HDFS-4147. When there is a snapshot in a subtree, deletion of the subtree should fail. (Jing Zhao via szetszwo) + + HDFS-4150. Update the inode in the block map when a snapshotted file or a + snapshot file is deleted. (Jing Zhao via szetszwo) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9f7b1fe96d..9f25ab92e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -145,6 +145,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; @@ -159,6 +160,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.common.Util; +import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapINodeUpdateEntry; import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.INodesInPath; import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; @@ -2718,21 +2720,48 @@ private boolean deleteInternal(String src, boolean recursive, * of blocks that need to be removed from blocksMap */ private void removeBlocks(BlocksMapUpdateInfo blocks) { - int start = 0; - int end = 0; - List toDeleteList = blocks.getToDeleteList(); - while (start < toDeleteList.size()) { - end = BLOCK_DELETION_INCREMENT + start; - end = end > toDeleteList.size() ? toDeleteList.size() : end; + Iterator> iter = blocks + .iterator(); + while (iter.hasNext()) { writeLock(); try { - for (int i = start; i < end; i++) { - blockManager.removeBlock(toDeleteList.get(i)); + for (int numberToHandle = BLOCK_DELETION_INCREMENT; iter.hasNext() + && numberToHandle > 0; numberToHandle--) { + Map.Entry entry = iter.next(); + updateBlocksMap(entry); } } finally { writeUnlock(); } - start = end; + } + } + + /** + * Update the blocksMap for a given block. + * + * @param entry + * The update entry containing both the block and its new INode. The + * block should be removed from the blocksMap if the INode is null, + * otherwise the INode for the block will be updated in the + * blocksMap. + */ + private void updateBlocksMap( + Map.Entry entry) { + Block block = entry.getKey(); + BlocksMapINodeUpdateEntry value = entry.getValue(); + if (value == null) { + blockManager.removeBlock(block); + } else { + BlockCollection toDelete = value.getToDelete(); + BlockInfo originalBlockInfo = blockManager.getStoredBlock(block); + // The FSDirectory tree and the blocksMap share the same INode reference. + // Thus we use "==" to check if the INode for the block belongs to the + // current file (instead of the INode from a snapshot file). + if (originalBlockInfo != null + && toDelete == originalBlockInfo.getBlockCollection()) { + blockManager.addBlockCollection(originalBlockInfo, + value.getToReplace()); + } } } @@ -2754,7 +2783,11 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks) { boolean trackBlockCounts = isSafeModeTrackingBlocks(); int numRemovedComplete = 0, numRemovedSafe = 0; - for (Block b : blocks.getToDeleteList()) { + Iterator> blockIter = + blocks.iterator(); + while (blockIter.hasNext()) { + Map.Entry entry = blockIter.next(); + Block b = entry.getKey(); if (trackBlockCounts) { BlockInfo bi = blockManager.getStoredBlock(b); if (bi.isComplete()) { @@ -2764,8 +2797,9 @@ void removePathAndBlocks(String src, BlocksMapUpdateInfo blocks) { } } } - blockManager.removeBlock(b); + updateBlocksMap(entry); } + if (trackBlockCounts) { if (LOG.isDebugEnabled()) { LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index e836b1e299..c9588b9c54 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -22,7 +22,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; +import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.ContentSummary; @@ -31,6 +34,7 @@ import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.util.StringUtils; @@ -194,7 +198,8 @@ public boolean isDirectory() { * * @param info * Containing all the blocks collected from the children of this - * INode. These blocks later should be removed from the blocksMap. + * INode. These blocks later should be removed/updated in the + * blocksMap. */ abstract int collectSubtreeBlocksAndClear(BlocksMapUpdateInfo info); @@ -501,44 +506,84 @@ public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix) { /** * Information used for updating the blocksMap when deleting files. */ - public static class BlocksMapUpdateInfo { - /** - * The list of blocks that need to be removed from blocksMap - */ - private List toDeleteList; - - public BlocksMapUpdateInfo(List toDeleteList) { - this.toDeleteList = toDeleteList == null ? new ArrayList() - : toDeleteList; - } + public static class BlocksMapUpdateInfo implements + Iterable> { + private final Map updateMap; public BlocksMapUpdateInfo() { - toDeleteList = new ArrayList(); + updateMap = new HashMap(); } /** - * @return The list of blocks that need to be removed from blocksMap - */ - public List getToDeleteList() { - return toDeleteList; - } - - /** - * Add a to-be-deleted block into the - * {@link BlocksMapUpdateInfo#toDeleteList} + * Add a to-be-deleted block. This block should belongs to a file without + * snapshots. We thus only need to put a block-null pair into the updateMap. + * * @param toDelete the to-be-deleted block */ public void addDeleteBlock(Block toDelete) { if (toDelete != null) { - toDeleteList.add(toDelete); + updateMap.put(toDelete, null); } } /** - * Clear {@link BlocksMapUpdateInfo#toDeleteList} + * Add a given block, as well as its old and new BlockCollection + * information, into the updateMap. + * + * @param toUpdateBlock + * The given block + * @param entry + * The BlocksMapINodeUpdateEntry instance containing both the + * original BlockCollection of the given block and the new + * BlockCollection of the given block for updating the blocksMap. + * The new BlockCollection should be the INode of one of the + * corresponding file's snapshot. + */ + public void addUpdateBlock(Block toUpdateBlock, + BlocksMapINodeUpdateEntry entry) { + updateMap.put(toUpdateBlock, entry); + } + + /** + * Clear {@link BlocksMapUpdateInfo#updateMap} */ public void clear() { - toDeleteList.clear(); + updateMap.clear(); + } + + @Override + public Iterator> iterator() { + return updateMap.entrySet().iterator(); + } + } + + /** + * When deleting a file with snapshot, we cannot directly remove its record + * from blocksMap. Instead, we should consider replacing the original record + * in blocksMap with INode of snapshot. + */ + public static class BlocksMapINodeUpdateEntry { + /** + * The BlockCollection of the file to be deleted + */ + private final BlockCollection toDelete; + /** + * The BlockCollection of the to-be-deleted file's snapshot + */ + private final BlockCollection toReplace; + + public BlocksMapINodeUpdateEntry(BlockCollection toDelete, + BlockCollection toReplace) { + this.toDelete = toDelete; + this.toReplace = toReplace; + } + + public BlockCollection getToDelete() { + return toDelete; + } + + public BlockCollection getToReplace() { + return toReplace; } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index edd85a96fc..6f9aa64f37 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -91,7 +91,7 @@ public short getBlockReplication() { return getFileReplication(); } - void setFileReplication(short replication) { + protected void setFileReplication(short replication) { if(replication <= 0) throw new IllegalArgumentException("Unexpected value for the replication"); header = ((long)replication << BLOCKBITS) | (header & ~HEADERMASK); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeFileWithLink.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeFileWithLink.java index 440d27e04f..b67808ebdc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeFileWithLink.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeFileWithLink.java @@ -17,10 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode.snapshot; -import java.util.List; - import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.namenode.INodeFile; @@ -61,7 +58,8 @@ public void insert(INodeFileWithLink inode) { @Override public short getBlockReplication() { short max = getFileReplication(); - for(INodeFileWithLink i = next; i != this; i = i.getNext()) { + // i may be null since next will be set to null when the INode is deleted + for(INodeFileWithLink i = next; i != this && i != null; i = i.getNext()) { final short replication = i.getFileReplication(); if (replication > max) { max = replication; @@ -78,35 +76,46 @@ public short getBlockReplication() { * any other inode, collect them and update the block list. */ @Override - protected int collectSubtreeBlocksAndClear(List v) { + protected int collectSubtreeBlocksAndClear(BlocksMapUpdateInfo info) { if (next == this) { - //this is the only remaining inode. - super.collectSubtreeBlocksAndClear(v); + // this is the only remaining inode. + super.collectSubtreeBlocksAndClear(info); } else { - //There are other inode(s) using the blocks. - //Compute max file size excluding this and find the last inode. + // There are other inode(s) using the blocks. + // Compute max file size excluding this and find the last inode. long max = next.computeFileSize(true); + short maxReplication = next.getFileReplication(); INodeFileWithLink last = next; for(INodeFileWithLink i = next.getNext(); i != this; i = i.getNext()) { final long size = i.computeFileSize(true); if (size > max) { max = size; } + final short rep = i.getFileReplication(); + if (rep > maxReplication) { + maxReplication = rep; + } last = i; } - collectBlocksBeyondMaxAndClear(max, v); + collectBlocksBeyondMaxAndClear(max, info); - //remove this from the circular linked list. + // remove this from the circular linked list. last.next = this.next; + // Set the replication of the current INode to the max of all the other + // linked INodes, so that in case the current INode is retrieved from the + // blocksMap before it is removed or updated, the correct replication + // number can be retrieved. + this.setFileReplication(maxReplication); this.next = null; - //clear parent + // clear parent parent = null; } return 1; } - private void collectBlocksBeyondMaxAndClear(final long max, final List v) { + private void collectBlocksBeyondMaxAndClear(final long max, + final BlocksMapUpdateInfo info) { final BlockInfo[] oldBlocks = getBlocks(); if (oldBlocks != null) { //find the minimum n such that the size of the first n blocks > max @@ -115,9 +124,18 @@ private void collectBlocksBeyondMaxAndClear(final long max, final List v) size += oldBlocks[n].getNumBytes(); } - //starting from block n, the data is beyond max. + // Replace the INode for all the remaining blocks in blocksMap + BlocksMapINodeUpdateEntry entry = new BlocksMapINodeUpdateEntry(this, + next); + if (info != null) { + for (int i = 0; i < n; i++) { + info.addUpdateBlock(oldBlocks[i], entry); + } + } + + // starting from block n, the data is beyond max. if (n < oldBlocks.length) { - //resize the array. + // resize the array. final BlockInfo[] newBlocks; if (n == 0) { newBlocks = null; @@ -129,10 +147,10 @@ private void collectBlocksBeyondMaxAndClear(final long max, final List v) i.setBlocks(newBlocks); } - //collect the blocks beyond max. - if (v != null) { + // collect the blocks beyond max. + if (info != null) { for(; n < oldBlocks.length; n++) { - v.add(oldBlocks[n]); + info.addDeleteBlock(oldBlocks[n]); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java new file mode 100644 index 0000000000..2966054a44 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotBlocksMap.java @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.namenode.FSDirectory; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.INodeFile; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cases for snapshot-related information in blocksMap. + */ +public class TestSnapshotBlocksMap { + + private static final long seed = 0; + private static final short REPLICATION = 3; + private static final int BLOCKSIZE = 1024; + + private final Path dir = new Path("/TestSnapshot"); + private final Path sub1 = new Path(dir, "sub1"); + + protected Configuration conf; + protected MiniDFSCluster cluster; + protected FSNamesystem fsn; + protected DistributedFileSystem hdfs; + + @Before + public void setUp() throws Exception { + conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION) + .build(); + cluster.waitActive(); + + fsn = cluster.getNamesystem(); + hdfs = cluster.getFileSystem(); + } + + @After + public void tearDown() throws Exception { + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Test deleting a file with snapshots. Need to check the blocksMap to make + * sure the corresponding record is updated correctly. + */ + @Test + public void testDeletionWithSnapshots() throws Exception { + Path file0 = new Path(sub1, "file0"); + Path file1 = new Path(sub1, "file1"); + + Path subsub1 = new Path(sub1, "sub1"); + Path subfile0 = new Path(subsub1, "file0"); + + // Create file under sub1 + DFSTestUtil.createFile(hdfs, file0, BLOCKSIZE, REPLICATION, seed); + DFSTestUtil.createFile(hdfs, file1, BLOCKSIZE, REPLICATION, seed); + DFSTestUtil.createFile(hdfs, subfile0, BLOCKSIZE, REPLICATION, seed); + + BlockManager bm = fsn.getBlockManager(); + FSDirectory dir = fsn.getFSDirectory(); + + INodeFile inodeForDeletedFile = INodeFile.valueOf( + dir.getINode(subfile0.toString()), subfile0.toString()); + BlockInfo[] blocksForDeletedFile = inodeForDeletedFile.getBlocks(); + assertEquals(blocksForDeletedFile.length, 1); + BlockCollection bcForDeletedFile = bm + .getBlockCollection(blocksForDeletedFile[0]); + assertNotNull(bcForDeletedFile); + // Normal deletion + hdfs.delete(subsub1, true); + bcForDeletedFile = bm.getBlockCollection(blocksForDeletedFile[0]); + // The INode should have been removed from the blocksMap + assertNull(bcForDeletedFile); + + // Create snapshots for sub1 + for (int i = 0; i < 2; i++) { + SnapshotTestHelper.createSnapshot(hdfs, sub1, "s" + i); + } + + // Check the block information for file0 + // Get the INode for file0 + INodeFile inode = INodeFile.valueOf(dir.getINode(file0.toString()), + file0.toString()); + BlockInfo[] blocks = inode.getBlocks(); + // Get the INode for the first block from blocksMap + BlockCollection bc = bm.getBlockCollection(blocks[0]); + // The two INode should be the same one + assertTrue(bc == inode); + + // Also check the block information for snapshot of file0 + Path snapshotFile0 = SnapshotTestHelper.getSnapshotPath(sub1, "s0", + file0.getName()); + INodeFile ssINode0 = INodeFile.valueOf(dir.getINode(snapshotFile0.toString()), + snapshotFile0.toString()); + BlockInfo[] ssBlocks = ssINode0.getBlocks(); + // The snapshot of file1 should contain 1 block + assertEquals(ssBlocks.length, 1); + + // Delete file0 + hdfs.delete(file0, true); + // Make sure the first block of file0 is still in blocksMap + BlockInfo blockInfoAfterDeletion = bm.getStoredBlock(blocks[0]); + assertNotNull(blockInfoAfterDeletion); + // Check the INode information + BlockCollection bcAfterDeletion = blockInfoAfterDeletion + .getBlockCollection(); + // The INode in the blocksMap should be no longer the original INode for + // file0 + assertFalse(bcAfterDeletion == inode); + + // Compare the INode in the blocksMap with INodes for snapshots + Path snapshot1File0 = SnapshotTestHelper.getSnapshotPath(sub1, "s1", + file0.getName()); + INodeFile ssINode1 = INodeFile.valueOf( + dir.getINode(snapshot1File0.toString()), snapshot1File0.toString()); + assertTrue(bcAfterDeletion == ssINode0 || bcAfterDeletion == ssINode1); + assertEquals(bcAfterDeletion.getBlocks().length, 1); + } + + // TODO: test for deletion file which was appended after taking snapshots + +}