HDFS-4675. Fix rename across snapshottable directories. Contributed by Jing Zhao

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1467540 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2013-04-13 02:48:34 +00:00
parent 4c00514ede
commit 9c6a7bebe2
20 changed files with 1304 additions and 133 deletions

View File

@ -231,3 +231,6 @@ Branch-2802 Snapshot (Unreleased)
HDFS-4684. Use INode id for image serialization when writing INodeReference.
(szetszwo)
HDFS-4675. Fix rename across snapshottable directories. (Jing Zhao via
szetszwo)

View File

@ -52,7 +52,6 @@
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
@ -439,7 +438,7 @@ void unprotectedRemoveBlock(String path, INodeFileUnderConstruction fileNode,
@Deprecated
boolean renameTo(String src, String dst)
throws QuotaExceededException, UnresolvedLinkException,
FileAlreadyExistsException, SnapshotAccessControlException {
FileAlreadyExistsException, SnapshotAccessControlException, IOException {
if (NameNode.stateChangeLog.isDebugEnabled()) {
NameNode.stateChangeLog.debug("DIR* FSDirectory.renameTo: "
+src+" to "+dst);
@ -495,7 +494,7 @@ void renameTo(String src, String dst, Options.Rename... options)
@Deprecated
boolean unprotectedRenameTo(String src, String dst, long timestamp)
throws QuotaExceededException, UnresolvedLinkException,
FileAlreadyExistsException, SnapshotAccessControlException {
FileAlreadyExistsException, SnapshotAccessControlException, IOException {
assert hasWriteLock();
INodesInPath srcIIP = rootDir.getINodesInPath4Write(src, false);
final INode srcInode = srcIIP.getLastINode();
@ -512,6 +511,13 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp)
+"failed to rename "+src+" to "+dst+ " because source is the root");
return false;
}
// srcInode and its subtree cannot contain snapshottable directories with
// snapshots
List<INodeDirectorySnapshottable> snapshottableDirs =
new ArrayList<INodeDirectorySnapshottable>();
checkSnapshot(srcInode, snapshottableDirs);
if (isDir(dst)) {
dst += Path.SEPARATOR + new Path(src).getName();
}
@ -536,7 +542,7 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp)
}
byte[][] dstComponents = INode.getPathComponents(dst);
final INodesInPath dstIIP = getExistingPathINodes(dstComponents);
INodesInPath dstIIP = getExistingPathINodes(dstComponents);
if (dstIIP.isSnapshot()) {
throw new SnapshotAccessControlException(
"Modification on RO snapshot is disallowed");
@ -547,7 +553,7 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp)
" because destination exists");
return false;
}
final INode dstParent = dstIIP.getINode(-2);
INode dstParent = dstIIP.getINode(-2);
if (dstParent == null) {
NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
+"failed to rename "+src+" to "+dst+
@ -565,6 +571,14 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp)
srcIIP.getLatestSnapshot());
final boolean srcChildIsReference = srcChild.isReference();
// Record the snapshot on srcChild. After the rename, before any new
// snapshot is taken on the dst tree, changes will be recorded in the latest
// snapshot of the src tree.
if (isSrcInSnapshot) {
srcChild = srcChild.recordModification(srcIIP.getLatestSnapshot());
srcIIP.setLastINode(srcChild);
}
// check srcChild for reference
final INodeReference.WithCount withCount;
if (srcChildIsReference || isSrcInSnapshot) {
@ -587,6 +601,15 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp)
return false;
}
// add src to the destination
if (dstParent.getParent() == null) {
// src and dst file/dir are in the same directory, and the dstParent has
// been replaced when we removed the src. Refresh the dstIIP and
// dstParent.
dstIIP = getExistingPathINodes(dstComponents);
dstParent = dstIIP.getINode(-2);
}
srcChild = srcIIP.getLastINode();
final byte[] dstChildName = dstIIP.getLastLocalName();
final INode toDst;
@ -595,13 +618,15 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp)
toDst = srcChild;
} else {
withCount.getReferredINode().setLocalName(dstChildName);
final INodeReference ref = new INodeReference(dstIIP.getINode(-2), withCount);
Snapshot dstSnapshot = dstIIP.getLatestSnapshot();
final INodeReference.DstReference ref = new INodeReference.DstReference(
dstParent.asDirectory(), withCount,
dstSnapshot == null ? Snapshot.INVALID_ID : dstSnapshot.getId());
withCount.setParentReference(ref);
withCount.incrementReferenceCount();
toDst = ref;
}
// add src to the destination
added = addLastINodeNoQuotaCheck(dstIIP, toDst);
if (added) {
if (NameNode.stateChangeLog.isDebugEnabled()) {
@ -676,7 +701,10 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp,
+ error);
throw new IOException(error);
}
// srcInode and its subtree cannot contain snapshottable directories with
// snapshots
checkSnapshot(srcInode, null);
// validate the destination
if (dst.equals(src)) {
throw new FileAlreadyExistsException(
@ -696,17 +724,17 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp,
+ error);
throw new IOException(error);
}
final INodesInPath dstIIP = rootDir.getINodesInPath4Write(dst, false);
INodesInPath dstIIP = rootDir.getINodesInPath4Write(dst, false);
if (dstIIP.getINodes().length == 1) {
error = "rename destination cannot be the root";
NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
+ error);
throw new IOException(error);
}
List<INodeDirectorySnapshottable> snapshottableDirs =
new ArrayList<INodeDirectorySnapshottable>();
final INode dstInode = dstIIP.getLastINode();
List<INodeDirectorySnapshottable> snapshottableDirs =
new ArrayList<INodeDirectorySnapshottable>();
if (dstInode != null) { // Destination exists
// It's OK to rename a file to a symlink and vice versa
if (dstInode.isDirectory() != srcInode.isDirectory()) {
@ -732,16 +760,7 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp,
throw new IOException(error);
}
}
INode snapshotNode = hasSnapshot(dstInode, snapshottableDirs);
if (snapshotNode != null) {
error = "The direcotry " + dstInode.getFullPathName()
+ " cannot be deleted for renaming since "
+ snapshotNode.getFullPathName()
+ " is snapshottable and already has snapshots";
NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
+ error);
throw new IOException(error);
}
checkSnapshot(dstInode, snapshottableDirs);
}
INode dstParent = dstIIP.getINode(-2);
@ -767,6 +786,14 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp,
srcIIP.getLatestSnapshot());
final boolean srcChildIsReference = srcChild.isReference();
// Record the snapshot on srcChild. After the rename, before any new
// snapshot is taken on the dst tree, changes will be recorded in the latest
// snapshot of the src tree.
if (isSrcInSnapshot) {
srcChild = srcChild.recordModification(srcIIP.getLatestSnapshot());
srcIIP.setLastINode(srcChild);
}
// check srcChild for reference
final INodeReference.WithCount withCount;
if (srcChildIsReference || isSrcInSnapshot) {
@ -789,6 +816,13 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp,
throw new IOException(error);
}
if (dstParent.getParent() == null) {
// src and dst file/dir are in the same directory, and the dstParent has
// been replaced when we removed the src. Refresh the dstIIP and
// dstParent.
dstIIP = rootDir.getINodesInPath4Write(dst, false);
}
boolean undoRemoveDst = false;
INode removedDst = null;
try {
@ -808,7 +842,10 @@ boolean unprotectedRenameTo(String src, String dst, long timestamp,
toDst = srcChild;
} else {
withCount.getReferredINode().setLocalName(dstChildName);
final INodeReference ref = new INodeReference(dstIIP.getINode(-2), withCount);
Snapshot dstSnapshot = dstIIP.getLatestSnapshot();
final INodeReference.DstReference ref = new INodeReference.DstReference(
dstIIP.getINode(-2).asDirectory(), withCount,
dstSnapshot == null ? Snapshot.INVALID_ID : dstSnapshot.getId());
withCount.setParentReference(ref);
withCount.incrementReferenceCount();
toDst = ref;
@ -1106,12 +1143,7 @@ boolean delete(String src, BlocksMapUpdateInfo collectedBlocks)
final INode targetNode = inodesInPath.getLastINode();
List<INodeDirectorySnapshottable> snapshottableDirs =
new ArrayList<INodeDirectorySnapshottable>();
INode snapshotNode = hasSnapshot(targetNode, snapshottableDirs);
if (snapshotNode != null) {
throw new IOException("The direcotry " + targetNode.getFullPathName()
+ " cannot be deleted since " + snapshotNode.getFullPathName()
+ " is snapshottable and already has snapshots");
}
checkSnapshot(targetNode, snapshottableDirs);
filesRemoved = unprotectedDelete(inodesInPath, collectedBlocks, now);
if (snapshottableDirs.size() > 0) {
// There are some snapshottable directories without snapshots to be
@ -1251,34 +1283,31 @@ long unprotectedDelete(INodesInPath iip, BlocksMapUpdateInfo collectedBlocks,
* Check if the given INode (or one of its descendants) is snapshottable and
* already has snapshots.
*
* @param target
* The given INode
* @param snapshottableDirs
* The list of directories that are snapshottable but do not have
* snapshots yet
* @return The INode which is snapshottable and already has snapshots.
* @param target The given INode
* @param snapshottableDirs The list of directories that are snapshottable
* but do not have snapshots yet
*/
private static INode hasSnapshot(INode target,
List<INodeDirectorySnapshottable> snapshottableDirs) {
private static void checkSnapshot(INode target,
List<INodeDirectorySnapshottable> snapshottableDirs) throws IOException {
if (target.isDirectory()) {
INodeDirectory targetDir = target.asDirectory();
if (targetDir.isSnapshottable()) {
INodeDirectorySnapshottable ssTargetDir =
(INodeDirectorySnapshottable) targetDir;
if (ssTargetDir.getNumSnapshots() > 0) {
return target;
throw new IOException("The direcotry " + ssTargetDir.getFullPathName()
+ " cannot be deleted since " + ssTargetDir.getFullPathName()
+ " is snapshottable and already has snapshots");
} else {
snapshottableDirs.add(ssTargetDir);
if (snapshottableDirs != null) {
snapshottableDirs.add(ssTargetDir);
}
}
}
for (INode child : targetDir.getChildrenList(null)) {
INode snapshotDir = hasSnapshot(child, snapshottableDirs);
if (snapshotDir != null) {
return snapshotDir;
}
checkSnapshot(child, snapshottableDirs);
}
}
return null;
}
/**
@ -2018,9 +2047,9 @@ private boolean addLastINodeNoQuotaCheck(INodesInPath inodesInPath, INode i) {
* Remove the last inode in the path from the namespace.
* Count of each ancestor with quota is also updated.
* @return -1 for failing to remove;
* 0 for removing a reference;
* 1 for removing a non-reference inode.
* @throws NSQuotaExceededException
* 0 for removing a reference whose referred inode has other
* reference nodes;
* >0 otherwise.
*/
private long removeLastINode(final INodesInPath iip)
throws QuotaExceededException {

View File

@ -426,6 +426,12 @@ private void loadDirectoryWithSnapshot(DataInput in)
final INodeDirectory parent = INodeDirectory.valueOf(
namesystem.dir.rootDir.getNode(parentPath, false), parentPath);
// Check if the whole subtree has been saved (for reference nodes)
boolean toLoadSubtree = referenceMap.toProcessSubtree(parent.getId());
if (!toLoadSubtree) {
return;
}
// Step 2. Load snapshots if parent is snapshottable
int numSnapshots = in.readInt();
if (numSnapshots >= 0) {
@ -650,16 +656,20 @@ INode loadINode(final byte[] localName, boolean isSnapshotINode,
modificationTime, atime, symlink);
} else if (numBlocks == -3) {
//reference
final boolean isWithName = in.readBoolean();
int dstSnapshotId = Snapshot.INVALID_ID;
if (!isWithName) {
dstSnapshotId = in.readInt();
}
final INodeReference.WithCount withCount
= referenceMap.loadINodeReferenceWithCount(isSnapshotINode, in, this);
if (isWithName) {
return new INodeReference.WithName(null, withCount, localName);
} else {
final INodeReference ref = new INodeReference(null, withCount);
final INodeReference ref = new INodeReference.DstReference(null,
withCount, dstSnapshotId);
withCount.setParentReference(ref);
return ref;
}
@ -830,9 +840,10 @@ void save(File newFile, FSImageCompression compression) throws IOException {
byte[] byteStore = new byte[4*HdfsConstants.MAX_PATH_LENGTH];
ByteBuffer strbuf = ByteBuffer.wrap(byteStore);
// save the root
FSImageSerialization.saveINode2Image(fsDir.rootDir, out, false, referenceMap);
FSImageSerialization.saveINode2Image(fsDir.rootDir, out, false,
referenceMap);
// save the rest of the nodes
saveImage(strbuf, fsDir.rootDir, out, null);
saveImage(strbuf, fsDir.rootDir, out, null, true);
// save files under construction
sourceNamesystem.saveFilesUnderConstruction(out);
context.checkCancelled();
@ -918,19 +929,13 @@ private String computeSnapshotPath(String nonSnapshotPath,
* @param current The current node
* @param out The DataoutputStream to write the image
* @param snapshot The possible snapshot associated with the current node
* @param toSaveSubtree Whether or not to save the subtree to fsimage. For
* reference node, its subtree may already have been
* saved before.
*/
private void saveImage(ByteBuffer currentDirName, INodeDirectory current,
DataOutputStream out, Snapshot snapshot)
DataOutputStream out, Snapshot snapshot, boolean toSaveSubtree)
throws IOException {
final ReadOnlyList<INode> children = current.getChildrenList(null);
int dirNum = 0;
Map<Snapshot, List<INodeDirectory>> snapshotDirMap = null;
if (current instanceof INodeDirectoryWithSnapshot) {
snapshotDirMap = new HashMap<Snapshot, List<INodeDirectory>>();
dirNum += ((INodeDirectoryWithSnapshot) current).
getSnapshotDirectory(snapshotDirMap);
}
// 1. Print prefix (parent directory name)
int prefixLen = currentDirName.position();
if (snapshot == null) {
@ -951,6 +956,19 @@ private void saveImage(ByteBuffer currentDirName, INodeDirectory current,
out.write(snapshotFullPathBytes);
}
if (!toSaveSubtree) {
return;
}
final ReadOnlyList<INode> children = current.getChildrenList(null);
int dirNum = 0;
Map<Snapshot, List<INodeDirectory>> snapshotDirMap = null;
if (current instanceof INodeDirectoryWithSnapshot) {
snapshotDirMap = new HashMap<Snapshot, List<INodeDirectory>>();
dirNum += ((INodeDirectoryWithSnapshot) current).
getSnapshotDirectory(snapshotDirMap);
}
// 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all
// Snapshots
if (current instanceof INodeDirectorySnapshottable) {
@ -971,18 +989,25 @@ private void saveImage(ByteBuffer currentDirName, INodeDirectory current,
// deleted sub-directories
out.writeInt(dirNum); // the number of sub-directories
for(INode child : children) {
if(!child.isDirectory())
if(!child.isDirectory()) {
continue;
currentDirName.put(PATH_SEPARATOR).put(child.getLocalNameBytes());
saveImage(currentDirName, child.asDirectory(), out, snapshot);
}
// make sure we only save the subtree under a reference node once
boolean toSave = child.isReference() ?
referenceMap.toProcessSubtree(child.getId()) : true;
currentDirName.put(PATH_SEPARATOR).put(child.getLocalNameBytes());
saveImage(currentDirName, child.asDirectory(), out, snapshot, toSave);
currentDirName.position(prefixLen);
}
if (snapshotDirMap != null) {
for (Snapshot ss : snapshotDirMap.keySet()) {
List<INodeDirectory> snapshotSubDirs = snapshotDirMap.get(ss);
for (INodeDirectory subDir : snapshotSubDirs) {
// make sure we only save the subtree under a reference node once
boolean toSave = subDir.getParentReference() != null ?
referenceMap.toProcessSubtree(subDir.getId()) : true;
currentDirName.put(PATH_SEPARATOR).put(subDir.getLocalNameBytes());
saveImage(currentDirName, subDir, out, ss);
saveImage(currentDirName, subDir, out, ss, toSave);
currentDirName.position(prefixLen);
}
}

View File

@ -44,6 +44,8 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import com.google.common.base.Preconditions;
/**
* Static utility functions for serializing various pieces of data in the correct
* format for the FSImage file.
@ -261,11 +263,19 @@ private static void writeINodeReference(INodeReference ref, DataOutput out,
out.writeLong(0); // preferred block size
out.writeInt(-3); // # of blocks
out.writeBoolean(ref instanceof INodeReference.WithName);
final boolean isWithName = ref instanceof INodeReference.WithName;
out.writeBoolean(isWithName);
if (!isWithName) {
Preconditions.checkState(ref instanceof INodeReference.DstReference);
// dst snapshot id
out.writeInt(((INodeReference.DstReference) ref).getDstSnapshotId());
}
final INodeReference.WithCount withCount
= (INodeReference.WithCount)ref.getReferredINode();
referenceMap.writeINodeReferenceWithCount(withCount, out, writeUnderConstruction);
referenceMap.writeINodeReferenceWithCount(withCount, out,
writeUnderConstruction);
}
/**
@ -275,7 +285,8 @@ public static void saveINode2Image(INode node, DataOutput out,
boolean writeUnderConstruction, ReferenceMap referenceMap)
throws IOException {
if (node.isReference()) {
writeINodeReference(node.asReference(), out, writeUnderConstruction, referenceMap);
writeINodeReference(node.asReference(), out, writeUnderConstruction,
referenceMap);
} else if (node.isDirectory()) {
writeINodeDirectory(node.asDirectory(), out);
} else if (node.isSymlink()) {

View File

@ -162,6 +162,11 @@ public final boolean isInLatestSnapshot(final Snapshot latest) {
if (latest == null) {
return false;
}
// if parent is a reference node, parent must be a renamed node. We can
// stop the check at the reference node.
if (parent != null && parent.isReference()) {
return true;
}
final INodeDirectory parentDir = getParent();
if (parentDir == null) { // root
return true;
@ -178,6 +183,32 @@ public final boolean isInLatestSnapshot(final Snapshot latest) {
}
return this == child.asReference().getReferredINode();
}
/**
* Called by {@link INode#recordModification}. For a reference node and its
* subtree, the function tells which snapshot the modification should be
* associated with: the snapshot that belongs to the SRC tree of the rename
* operation, or the snapshot belonging to the DST tree.
*
* @param latest
* the latest snapshot in the DST tree above the reference node
* @return True: the modification should be recorded in the snapshot that
* belongs to the SRC tree. False: the modification should be
* recorded in the snapshot that belongs to the DST tree.
*/
public final boolean isInSrcSnapshot(final Snapshot latest) {
if (latest == null) {
return true;
}
INodeReference withCount = getParentReference();
if (withCount != null) {
int dstSnapshotId = withCount.getParentReference().getDstSnapshotId();
if (dstSnapshotId >= latest.getId()) {
return true;
}
}
return false;
}
/**
* This inode is being modified. The previous version of the inode needs to

View File

@ -224,16 +224,6 @@ public void replaceChild(final INode oldChild, final INode newChild) {
}
}
INodeReference.WithCount replaceChild4Reference(INode oldChild) {
Preconditions.checkArgument(!oldChild.isReference());
final INodeReference.WithCount withCount
= new INodeReference.WithCount(null, oldChild);
final INodeReference ref = new INodeReference(this, withCount);
withCount.setParentReference(ref);
replaceChild(oldChild, ref);
return withCount;
}
INodeReference.WithName replaceChild4ReferenceWithName(INode oldChild) {
if (oldChild instanceof INodeReference.WithName) {
return (INodeReference.WithName)oldChild;
@ -241,12 +231,13 @@ INodeReference.WithName replaceChild4ReferenceWithName(INode oldChild) {
final INodeReference.WithCount withCount;
if (oldChild.isReference()) {
withCount = (INodeReference.WithCount) oldChild.asReference().getReferredINode();
withCount = (INodeReference.WithCount) oldChild.asReference()
.getReferredINode();
} else {
withCount = new INodeReference.WithCount(null, oldChild);
}
final INodeReference.WithName ref = new INodeReference.WithName(
this, withCount, oldChild.getLocalNameBytes());
final INodeReference.WithName ref = new INodeReference.WithName(this,
withCount, oldChild.getLocalNameBytes());
replaceChild(oldChild, ref);
return ref;
}
@ -420,14 +411,44 @@ INodesInPath getExistingPathINodes(byte[][] components, int numOfINodes,
if (index >= 0) {
existing.addNode(curNode);
}
final boolean isRef = curNode.isReference();
final boolean isDir = curNode.isDirectory();
final INodeDirectory dir = isDir? curNode.asDirectory(): null;
if (isDir && dir instanceof INodeDirectoryWithSnapshot) {
if (!isRef && isDir && dir instanceof INodeDirectoryWithSnapshot) {
//if the path is a non-snapshot path, update the latest snapshot.
if (!existing.isSnapshot()) {
existing.updateLatestSnapshot(
((INodeDirectoryWithSnapshot)dir).getLastSnapshot());
}
} else if (isRef && isDir && !lastComp) {
// If the curNode is a reference node, need to check its dstSnapshot:
// 1. if the existing snapshot is no later than the dstSnapshot (which
// is the latest snapshot in dst before the rename), the changes
// should be recorded in previous snapshots (belonging to src).
// 2. however, if the ref node is already the last component, we still
// need to know the latest snapshot among the ref node's ancestors,
// in case of processing a deletion operation. Thus we do not overwrite
// the latest snapshot if lastComp is true. In case of the operation is
// a modification operation, we do a similar check in corresponding
// recordModification method.
if (!existing.isSnapshot()) {
int dstSnapshotId = curNode.asReference().getDstSnapshotId();
Snapshot latest = existing.getLatestSnapshot();
if (latest == null || // no snapshot in dst tree of rename
dstSnapshotId >= latest.getId()) { // the above scenario
Snapshot lastSnapshot = null;
if (curNode.isDirectory()
&& curNode.asDirectory() instanceof INodeDirectoryWithSnapshot) {
lastSnapshot = ((INodeDirectoryWithSnapshot) curNode
.asDirectory()).getLastSnapshot();
} else if (curNode.isFile()
&& curNode.asFile() instanceof INodeFileWithSnapshot) {
lastSnapshot = ((INodeFileWithSnapshot) curNode
.asFile()).getDiffs().getLastSnapshot();
}
existing.setSnapshot(lastSnapshot);
}
}
}
if (curNode.isSymlink() && (!lastComp || (lastComp && resolveLink))) {
final String path = constructPath(components, 0, components.length);

View File

@ -54,7 +54,7 @@
* Note 2: getParent() always returns the parent in the current state, e.g.
* inode(id=1000,name=bar).getParent() returns /xyz but not /abc.
*/
public class INodeReference extends INode {
public abstract class INodeReference extends INode {
/**
* Try to remove the given reference and then return the reference count.
* If the given inode is not a reference, return -1;
@ -75,6 +75,10 @@ private static int removeReference(INodeReference ref) {
if (!(referred instanceof WithCount)) {
return -1;
}
WithCount wc = (WithCount) referred;
if (ref == wc.getParentReference()) {
wc.setParent(null);
}
return ((WithCount)referred).decrementReferenceCount();
}
@ -85,7 +89,6 @@ public INodeReference(INode parent, INode referred) {
this.referred = referred;
}
public final INode getReferredINode() {
return referred;
}
@ -276,6 +279,9 @@ public final void clear() {
public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
final Snapshot snapshot) {
super.dumpTreeRecursively(out, prefix, snapshot);
if (this instanceof DstReference) {
out.print(", dstSnapshotId=" + ((DstReference) this).dstSnapshotId);
}
if (this instanceof WithCount) {
out.print(", count=" + ((WithCount)this).getReferenceCount());
}
@ -288,6 +294,10 @@ public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
b.append("->");
getReferredINode().dumpTreeRecursively(out, b, snapshot);
}
public int getDstSnapshotId() {
return Snapshot.INVALID_ID;
}
/** An anonymous reference with reference count. */
public static class WithCount extends INodeReference {
@ -336,4 +346,29 @@ public final void setLocalName(byte[] name) {
+ " is immutable.");
}
}
public static class DstReference extends INodeReference {
/**
* Record the latest snapshot of the dst subtree before the rename. For
* later operations on the moved/renamed files/directories, if the latest
* snapshot is after this dstSnapshot, changes will be recorded to the
* latest snapshot. Otherwise changes will be recorded to the snapshot
* belonging to the src of the rename.
*
* {@link Snapshot#INVALID_ID} means no dstSnapshot (e.g., src of the
* first-time rename).
*/
private final int dstSnapshotId;
@Override
public final int getDstSnapshotId() {
return dstSnapshotId;
}
public DstReference(INodeDirectory parent, WithCount referred,
final int dstSnapshotId) {
super(parent, referred);
this.dstSnapshotId = dstSnapshotId;
}
}
}

View File

@ -132,6 +132,7 @@ abstract Quota.Counts combinePosteriorAndCollectBlocks(final N currentINode,
/**
* Delete and clear self.
* @param currentINode The inode where the deletion happens.
* @param collectedBlocks Used to collect blocks for deletion.
* @return quota usage delta
*/

View File

@ -136,7 +136,7 @@ public final D getLast() {
}
/** @return the last snapshot. */
final Snapshot getLastSnapshot() {
public final Snapshot getLastSnapshot() {
final AbstractINodeDiff<N, D> last = getLast();
return last == null? null: last.getSnapshot();
}
@ -147,7 +147,7 @@ final Snapshot getLastSnapshot() {
* snapshot.
* @return The latest snapshot before the given snapshot.
*/
final Snapshot getPrior(Snapshot anchor) {
private final Snapshot getPrior(Snapshot anchor) {
if (anchor == null) {
return getLastSnapshot();
}
@ -159,6 +159,18 @@ final Snapshot getPrior(Snapshot anchor) {
return diffs.get(priorIndex).getSnapshot();
}
}
/**
* Update the prior snapshot.
*/
final Snapshot updatePrior(Snapshot snapshot, Snapshot prior) {
Snapshot s = getPrior(snapshot);
if (s != null &&
(prior == null || Snapshot.ID_COMPARATOR.compare(s, prior) > 0)) {
return s;
}
return prior;
}
/**
* @return the diff corresponding to the given snapshot.

View File

@ -109,13 +109,16 @@ private Quota.Counts destroyCreatedList(
/** clear the deleted list */
private Quota.Counts destroyDeletedList(
final BlocksMapUpdateInfo collectedBlocks) {
final BlocksMapUpdateInfo collectedBlocks,
final List<INodeReference> refNodes) {
Quota.Counts counts = Quota.Counts.newInstance();
final List<INode> deletedList = getList(ListType.DELETED);
for (INode d : deletedList) {
if (INodeReference.tryRemoveReference(d) <= 0) {
d.computeQuotaUsage(counts, false);
d.destroyAndCollectBlocks(collectedBlocks);
} else {
refNodes.add(d.asReference());
}
}
deletedList.clear();
@ -269,6 +272,23 @@ public void process(INode inode) {
if (INodeReference.tryRemoveReference(inode) <= 0) {
inode.computeQuotaUsage(counts, false);
inode.destroyAndCollectBlocks(collectedBlocks);
} else {
// if the node is a reference node, we should continue the
// snapshot deletion process
try {
// use null as prior here because we are handling a reference
// node stored in the created list of a snapshot diff. This
// snapshot diff must be associated with the latest snapshot of
// the dst tree before the rename operation. In this scenario,
// the prior snapshot should be the one created in the src tree,
// and it can be identified by the cleanSubtree since we call
// recordModification before the rename.
counts.add(inode.cleanSubtree(posterior.snapshot, null,
collectedBlocks));
} catch (QuotaExceededException e) {
String error = "should not have QuotaExceededException while deleting snapshot";
LOG.error(error, e);
}
}
}
}
@ -367,7 +387,28 @@ Quota.Counts destroyDiffAndCollectBlocks(INodeDirectory currentINode,
BlocksMapUpdateInfo collectedBlocks) {
// this diff has been deleted
Quota.Counts counts = Quota.Counts.newInstance();
counts.add(diff.destroyDeletedList(collectedBlocks));
List<INodeReference> refNodes = new ArrayList<INodeReference>();
counts.add(diff.destroyDeletedList(collectedBlocks, refNodes));
for (INodeReference ref : refNodes) {
// if the node is a reference node, we should continue the
// snapshot deletion process
try {
// Use null as prior snapshot. We are handling a reference node stored
// in the delete list of this snapshot diff. We need to destroy this
// snapshot diff because it is the very first one in history.
// If the ref node is a WithName instance acting as the src node of
// the rename operation, there will not be any snapshot before the
// snapshot to be deleted. If the ref node presents the dst node of a
// rename operation, we can identify the corresponding prior snapshot
// when we come into the subtree of the ref node.
counts.add(ref.cleanSubtree(this.snapshot, null, collectedBlocks));
} catch (QuotaExceededException e) {
String error =
"should not have QuotaExceededException while deleting snapshot "
+ this.snapshot;
LOG.error(error, e);
}
}
return counts;
}
}
@ -511,8 +552,10 @@ public INodeDirectory getSnapshotINode(Snapshot snapshot) {
@Override
public INodeDirectoryWithSnapshot recordModification(final Snapshot latest)
throws QuotaExceededException {
return isInLatestSnapshot(latest)?
saveSelf2Snapshot(latest, null): this;
if (isInLatestSnapshot(latest) && !isInSrcSnapshot(latest)) {
return saveSelf2Snapshot(latest, null);
}
return this;
}
/** Save the snapshot copy to the latest snapshot. */
@ -604,16 +647,6 @@ public void replaceChild(final INode oldChild, final INode newChild) {
diffs.replaceChild(ListType.CREATED, oldChild, newChild);
}
/** The child just has been removed, replace it with a reference. */
public INodeReference.WithName replaceRemovedChild4Reference(
INode oldChild, INodeReference.WithCount newChild, byte[] childName) {
final INodeReference.WithName ref = new INodeReference.WithName(this,
newChild, childName);
newChild.incrementReferenceCount();
replaceRemovedChild(oldChild, ref);
return ref;
}
/** The child just has been removed, replace it with a reference. */
public void replaceRemovedChild(INode oldChild, INode newChild) {
// the old child must be in the deleted list
@ -673,11 +706,7 @@ public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
}
} else {
// update prior
Snapshot s = getDiffs().getPrior(snapshot);
if (s != null &&
(prior == null || Snapshot.ID_COMPARATOR.compare(s, prior) > 0)) {
prior = s;
}
prior = getDiffs().updatePrior(snapshot, prior);
counts.add(getDiffs().deleteSnapshotDiff(snapshot, prior, this,
collectedBlocks));
if (prior != null) {

View File

@ -95,7 +95,7 @@ public INodeFile getSnapshotINode(Snapshot snapshot) {
@Override
public INodeFileUnderConstructionWithSnapshot recordModification(
final Snapshot latest) throws QuotaExceededException {
if (isInLatestSnapshot(latest)) {
if (isInLatestSnapshot(latest) && !isInSrcSnapshot(latest)) {
diffs.saveSelf2Snapshot(latest, this, null);
}
return this;
@ -121,6 +121,7 @@ public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
Util.collectBlocksAndClear(this, collectedBlocks);
return Quota.Counts.newInstance();
} else { // delete a snapshot
prior = getDiffs().updatePrior(snapshot, prior);
return diffs.deleteSnapshotDiff(snapshot, prior, this, collectedBlocks);
}
}

View File

@ -66,7 +66,7 @@ public INodeFile getSnapshotINode(Snapshot snapshot) {
@Override
public INodeFileWithSnapshot recordModification(final Snapshot latest)
throws QuotaExceededException {
if (isInLatestSnapshot(latest)) {
if (isInLatestSnapshot(latest) && !isInSrcSnapshot(latest)) {
diffs.saveSelf2Snapshot(latest, this, null);
}
return this;
@ -92,6 +92,7 @@ public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
Util.collectBlocksAndClear(this, collectedBlocks);
return Quota.Counts.newInstance();
} else { // delete a snapshot
prior = getDiffs().updatePrior(snapshot, prior);
return diffs.deleteSnapshotDiff(snapshot, prior, this, collectedBlocks);
}
}

View File

@ -35,6 +35,8 @@
/** Snapshot of a sub-tree in the namesystem. */
@InterfaceAudience.Private
public class Snapshot implements Comparable<byte[]> {
public static final int INVALID_ID = -1;
/**
* Compare snapshot IDs. Null indicates the current status thus is greater
* than non-null snapshots.
@ -69,12 +71,8 @@ public static Snapshot findLatestSnapshot(INode inode, Snapshot anchor) {
if (inode.isDirectory()) {
final INodeDirectory dir = inode.asDirectory();
if (dir instanceof INodeDirectoryWithSnapshot) {
final Snapshot s = ((INodeDirectoryWithSnapshot)dir).getDiffs()
.getPrior(anchor);
if (latest == null
|| (s != null && ID_COMPARATOR.compare(latest, s) < 0)) {
latest = s;
}
latest = ((INodeDirectoryWithSnapshot) dir).getDiffs().updatePrior(
anchor, latest);
}
}
}

View File

@ -307,11 +307,19 @@ private static DirectoryDiff loadDirectoryDiff(
/** A reference map for fsimage serialization. */
public static class ReferenceMap {
/**
* Used to indicate whether the reference node itself has been saved
*/
private final Map<Long, INodeReference.WithCount> referenceMap
= new HashMap<Long, INodeReference.WithCount>();
/**
* Used to record whether the subtree of the reference node has been saved
*/
private final Map<Long, Long> dirMap = new HashMap<Long, Long>();
public void writeINodeReferenceWithCount(INodeReference.WithCount withCount,
DataOutput out, boolean writeUnderConstruction) throws IOException {
public void writeINodeReferenceWithCount(
INodeReference.WithCount withCount, DataOutput out,
boolean writeUnderConstruction) throws IOException {
final INode referred = withCount.getReferredINode();
final long id = withCount.getId();
final boolean firstReferred = !referenceMap.containsKey(id);
@ -326,6 +334,15 @@ public void writeINodeReferenceWithCount(INodeReference.WithCount withCount,
}
}
public boolean toProcessSubtree(long id) {
if (dirMap.containsKey(id)) {
return false;
} else {
dirMap.put(id, id);
return true;
}
}
public INodeReference.WithCount loadINodeReferenceWithCount(
boolean isSnapshotINode, DataInput in, FSImageFormat.Loader loader
) throws IOException {

View File

@ -22,6 +22,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
@ -271,7 +272,16 @@ public SnapshottableDirectoryStatus[] getSnapshottableDirListing(
public void removeSnapshottableDirs(
List<INodeDirectorySnapshottable> toRemoveList) {
if (toRemoveList != null) {
this.snapshottables.removeAll(toRemoveList);
Iterator<INodeDirectorySnapshottable> iter = snapshottables.iterator();
while (iter.hasNext()) {
INodeDirectorySnapshottable next = iter.next();
for (INodeDirectorySnapshottable toRemove : toRemoveList) {
if (next == toRemove) {
iter.remove();
break;
}
}
}
// modify the numSnapshottableDirs metrics
numSnapshottableDirs.addAndGet(-toRemoveList.size());
}

View File

@ -236,7 +236,7 @@ void checkImage(int s) throws IOException {
// dump the fsdir tree
File fsnBetween = dumpTree2File(name + "_between");
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnBetween);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnBetween, true);
// restart the cluster, and format the cluster
cluster = new MiniDFSCluster.Builder(conf).format(true)
@ -252,7 +252,7 @@ void checkImage(int s) throws IOException {
File fsnAfter = dumpTree2File(name + "_after");
// compare two dumped tree
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnAfter);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnAfter, true);
long numSdirAfter = fsn.getNumSnapshottableDirs();
long numSnapshotAfter = fsn.getNumSnapshots();
@ -323,7 +323,7 @@ public void testSaveLoadImageWithAppending() throws Exception {
File fsnAfter = dumpTree2File("after");
// compare two dumped tree
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnAfter);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnAfter, true);
}
/**

View File

@ -187,17 +187,18 @@ public static void checkSnapshotCreation(DistributedFileSystem hdfs,
* </pre>
* @see INode#dumpTreeRecursively()
*/
public static void compareDumpedTreeInFile(File file1, File file2)
throws IOException {
public static void compareDumpedTreeInFile(File file1, File file2,
boolean compareQuota) throws IOException {
try {
compareDumpedTreeInFile(file1, file2, false);
compareDumpedTreeInFile(file1, file2, compareQuota, false);
} catch(Throwable t) {
LOG.info("FAILED compareDumpedTreeInFile(" + file1 + ", " + file2 + ")", t);
compareDumpedTreeInFile(file1, file2, true);
compareDumpedTreeInFile(file1, file2, compareQuota, true);
}
}
private static void compareDumpedTreeInFile(File file1, File file2,
boolean print) throws IOException {
boolean compareQuota, boolean print) throws IOException {
if (print) {
printFile(file1);
printFile(file2);
@ -227,6 +228,11 @@ private static void compareDumpedTreeInFile(File file1, File file2,
line1 = line1.replaceAll("replicas=\\[.*\\]", "replicas=[]");
line2 = line2.replaceAll("replicas=\\[.*\\]", "replicas=[]");
if (!compareQuota) {
line1 = line1.replaceAll("Quota\\[.*\\]", "Quota[]");
line2 = line2.replaceAll("Quota\\[.*\\]", "Quota[]");
}
// skip the specific fields of BlockInfoUnderConstruction when the node
// is an INodeFileSnapshot or an INodeFileUnderConstructionSnapshot
if (line1.contains("(INodeFileSnapshot)")

View File

@ -204,8 +204,8 @@ private void checkFSImage() throws Exception {
// dump the namespace loaded from fsimage
SnapshotTestHelper.dumpTree2File(fsdir, fsnAfter);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnMiddle);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnAfter);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnMiddle, true);
SnapshotTestHelper.compareDumpedTreeInFile(fsnBefore, fsnAfter, true);
}
/**

View File

@ -140,8 +140,7 @@ public void testDeleteDirectoryWithSnapshot2() throws Exception {
// Deleting dir while its descedant subsub1 having snapshots should fail
exception.expect(RemoteException.class);
String error = "The direcotry " + dir.toString()
+ " cannot be deleted since " + subsub.toString()
String error = subsub.toString()
+ " is snapshottable and already has snapshots";
exception.expectMessage(error);
hdfs.delete(dir, true);