diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index aa3e31dc65..ccfe9b7df4 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -306,6 +306,9 @@ Release 2.2.0 - UNRELEASED HADOOP-9673. NetworkTopology: when a node can't be added, print out its location for diagnostic purposes. (Colin Patrick McCabe) + HADOOP-9414. Refactor out FSLinkResolver and relevant helper methods. + (Andrew Wang via Colin Patrick McCabe) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index 92e3e6a463..c381f7c3cc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -729,6 +729,7 @@ public abstract class AbstractFileSystem { /** * Returns true if the file system supports symlinks, false otherwise. + * @return true if filesystem supports symlinks */ public boolean supportsSymlinks() { return false; @@ -744,8 +745,9 @@ public abstract class AbstractFileSystem { } /** - * The specification of this method matches that of - * {@link FileContext#getLinkTarget(Path)}; + * Partially resolves the path. This is used during symlink resolution in + * {@link FSLinkResolver}, and differs from the similarly named method + * {@link FileContext#getLinkTarget(Path)}. */ public Path getLinkTarget(final Path f) throws IOException { /* We should never get here. Any file system that threw an diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java new file mode 100644 index 0000000000..625e5f24ed --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import java.net.URI; + +/** + * Class used to perform an operation on and resolve symlinks in a + * path. The operation may potentially span multiple file systems. + */ +public abstract class FSLinkResolver { + + private static final int MAX_PATH_LINKS = 32; + + /** + * See {@link #qualifySymlinkTarget(URI, Path, Path)} + */ + public static Path qualifySymlinkTarget(final AbstractFileSystem pathFS, + Path pathWithLink, Path target) { + return qualifySymlinkTarget(pathFS.getUri(), pathWithLink, target); + } + + /** + * See {@link #qualifySymlinkTarget(URI, Path, Path)} + */ + public static Path qualifySymlinkTarget(final FileSystem pathFS, + Path pathWithLink, Path target) { + return qualifySymlinkTarget(pathFS.getUri(), pathWithLink, target); + } + + /** + * Return a fully-qualified version of the given symlink target if it + * has no scheme and authority. Partially and fully-qualified paths + * are returned unmodified. + * @param pathURI URI of the filesystem of pathWithLink + * @param pathWithLink Path that contains the symlink + * @param target The symlink's absolute target + * @return Fully qualified version of the target. + */ + private static Path qualifySymlinkTarget(final URI pathURI, + Path pathWithLink, Path target) { + // NB: makeQualified uses the target's scheme and authority, if + // specified, and the scheme and authority of pathURI, if not. + final URI targetUri = target.toUri(); + final String scheme = targetUri.getScheme(); + final String auth = targetUri.getAuthority(); + return (scheme == null && auth == null) ? target.makeQualified(pathURI, + pathWithLink.getParent()) : target; + } + + // FileContext / AbstractFileSystem resolution methods + + /** + * Generic helper function overridden on instantiation to perform a + * specific operation on the given file system using the given path + * which may result in an UnresolvedLinkException. + * @param fs AbstractFileSystem to perform the operation on. + * @param p Path given the file system. + * @return Generic type determined by the specific implementation. + * @throws UnresolvedLinkException If symbolic link path could + * not be resolved + * @throws IOException an I/O error occurred + */ + public T next(final AbstractFileSystem fs, final Path p) + throws IOException, UnresolvedLinkException { + throw new AssertionError("Should not be called without first overriding!"); + } + + /** + * Performs the operation specified by the next function, calling it + * repeatedly until all symlinks in the given path are resolved. + * @param fc FileContext used to access file systems. + * @param path The path to resolve symlinks on. + * @return Generic type determined by the implementation of next. + * @throws IOException + */ + public T resolve(final FileContext fc, final Path path) throws IOException { + int count = 0; + T in = null; + Path p = path; + // NB: More than one AbstractFileSystem can match a scheme, eg + // "file" resolves to LocalFs but could have come by RawLocalFs. + AbstractFileSystem fs = fc.getFSofPath(p); + + // Loop until all symlinks are resolved or the limit is reached + for (boolean isLink = true; isLink;) { + try { + in = next(fs, p); + isLink = false; + } catch (UnresolvedLinkException e) { + if (count++ > MAX_PATH_LINKS) { + throw new IOException("Possible cyclic loop while " + + "following symbolic link " + path); + } + // Resolve the first unresolved path component + p = FSLinkResolver.qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); + fs = fc.getFSofPath(p); + } + } + return in; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index 26f50503fe..32a58bf83b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -282,17 +282,6 @@ public final class FileContext { DELETE_ON_EXIT.clear(); } } - - /** - * Pathnames with scheme and relative path are illegal. - * @param path to be checked - */ - private static void checkNotSchemeWithRelative(final Path path) { - if (path.toUri().isAbsolute() && !path.isUriPathAbsolute()) { - throw new HadoopIllegalArgumentException( - "Unsupported name: has scheme but relative path-part"); - } - } /** * Get the file system of supplied path. @@ -305,13 +294,10 @@ public final class FileContext { * @throws IOExcepton If the file system for absOrFqPath could * not be instantiated. */ - private AbstractFileSystem getFSofPath(final Path absOrFqPath) + protected AbstractFileSystem getFSofPath(final Path absOrFqPath) throws UnsupportedFileSystemException, IOException { - checkNotSchemeWithRelative(absOrFqPath); - if (!absOrFqPath.isAbsolute() && absOrFqPath.toUri().getScheme() == null) { - throw new HadoopIllegalArgumentException( - "FileContext Bug: path is relative"); - } + absOrFqPath.checkNotSchemeWithRelative(); + absOrFqPath.checkNotRelative(); try { // Is it the default FS for this FileContext? @@ -513,7 +499,7 @@ public final class FileContext { * */ public void setWorkingDirectory(final Path newWDir) throws IOException { - checkNotSchemeWithRelative(newWDir); + newWDir.checkNotSchemeWithRelative(); /* wd is stored as a fully qualified path. We check if the given * path is not relative first since resolve requires and returns * an absolute path. @@ -1118,26 +1104,6 @@ public final class FileContext { }.resolve(this, absF); } - /** - * Return a fully qualified version of the given symlink target if it - * has no scheme and authority. Partially and fully qualified paths - * are returned unmodified. - * @param pathFS The AbstractFileSystem of the path - * @param pathWithLink Path that contains the symlink - * @param target The symlink's absolute target - * @return Fully qualified version of the target. - */ - private static Path qualifySymlinkTarget(final AbstractFileSystem pathFS, - Path pathWithLink, Path target) { - // NB: makeQualified uses the target's scheme and authority, if - // specified, and the scheme and authority of pathFS, if not. - final String scheme = target.toUri().getScheme(); - final String auth = target.toUri().getAuthority(); - return (scheme == null && auth == null) - ? target.makeQualified(pathFS.getUri(), pathWithLink.getParent()) - : target; - } - /** * Return a file status object that represents the path. If the path * refers to a symlink then the FileStatus of the symlink is returned. @@ -2156,9 +2122,9 @@ public final class FileContext { boolean overwrite) throws AccessControlException, FileAlreadyExistsException, FileNotFoundException, ParentNotDirectoryException, UnsupportedFileSystemException, - IOException { - checkNotSchemeWithRelative(src); - checkNotSchemeWithRelative(dst); + IOException { + src.checkNotSchemeWithRelative(); + dst.checkNotSchemeWithRelative(); Path qSrc = makeQualified(src); Path qDst = makeQualified(dst); checkDest(qSrc.getName(), qDst, overwrite); @@ -2324,64 +2290,7 @@ public final class FileContext { }.resolve(this, absF); return result; } - - /** - * Class used to perform an operation on and resolve symlinks in a - * path. The operation may potentially span multiple file systems. - */ - protected static abstract class FSLinkResolver { - // The maximum number of symbolic link components in a path - private static final int MAX_PATH_LINKS = 32; - /** - * Generic helper function overridden on instantiation to perform a - * specific operation on the given file system using the given path - * which may result in an UnresolvedLinkException. - * @param fs AbstractFileSystem to perform the operation on. - * @param p Path given the file system. - * @return Generic type determined by the specific implementation. - * @throws UnresolvedLinkException If symbolic link path could - * not be resolved - * @throws IOException an I/O error occured - */ - public abstract T next(final AbstractFileSystem fs, final Path p) - throws IOException, UnresolvedLinkException; - - /** - * Performs the operation specified by the next function, calling it - * repeatedly until all symlinks in the given path are resolved. - * @param fc FileContext used to access file systems. - * @param p The path to resolve symlinks in. - * @return Generic type determined by the implementation of next. - * @throws IOException - */ - public T resolve(final FileContext fc, Path p) throws IOException { - int count = 0; - T in = null; - Path first = p; - // NB: More than one AbstractFileSystem can match a scheme, eg - // "file" resolves to LocalFs but could have come by RawLocalFs. - AbstractFileSystem fs = fc.getFSofPath(p); - - // Loop until all symlinks are resolved or the limit is reached - for (boolean isLink = true; isLink;) { - try { - in = next(fs, p); - isLink = false; - } catch (UnresolvedLinkException e) { - if (count++ > MAX_PATH_LINKS) { - throw new IOException("Possible cyclic loop while " + - "following symbolic link " + first); - } - // Resolve the first unresolved path component - p = qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); - fs = fc.getFSofPath(p); - } - } - return in; - } - } - /** * Get the statistics for a particular file system * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java index feef1c7bab..4b50882eae 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java @@ -25,6 +25,7 @@ import java.util.regex.Pattern; import org.apache.avro.reflect.Stringable; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -57,6 +58,32 @@ public class Path implements Comparable { private URI uri; // a hierarchical uri + /** + * Pathnames with scheme and relative path are illegal. + * @param path to be checked + */ + void checkNotSchemeWithRelative() { + if (toUri().isAbsolute() && !isUriPathAbsolute()) { + throw new HadoopIllegalArgumentException( + "Unsupported name: has scheme but relative path-part"); + } + } + + void checkNotRelative() { + if (!isAbsolute() && toUri().getScheme() == null) { + throw new HadoopIllegalArgumentException("Path is relative"); + } + } + + public static Path getPathWithoutSchemeAndAuthority(Path path) { + // This code depends on Path.toString() to remove the leading slash before + // the drive specification on Windows. + Path newPath = path.isUriPathAbsolute() ? + new Path(null, null, path.toUri().getPath()) : + path; + return newPath; + } + /** Resolve a child path against a parent path. */ public Path(String parent, String child) { this(new Path(parent), new Path(child)); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java index f4ce75e1b2..6fe12b94df 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/local/RawLocalFs.java @@ -17,20 +17,20 @@ */ package org.apache.hadoop.fs.local; -import java.io.IOException; -import java.io.File; import java.io.FileNotFoundException; +import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.AbstractFileSystem; import org.apache.hadoop.fs.DelegateToFileSystem; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.FsServerDefaults; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RawLocalFileSystem; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Shell; @@ -91,8 +91,8 @@ public class RawLocalFs extends DelegateToFileSystem { // NB: Use createSymbolicLink in java.nio.file.Path once available try { Shell.execCommand(Shell.getSymlinkCommand( - getPathWithoutSchemeAndAuthority(target).getPath(), - getPathWithoutSchemeAndAuthority(link).getPath())); + Path.getPathWithoutSchemeAndAuthority(target).toString(), + Path.getPathWithoutSchemeAndAuthority(link).toString())); } catch (IOException x) { throw new IOException("Unable to create symlink: "+x.getMessage()); } @@ -175,13 +175,4 @@ public class RawLocalFs extends DelegateToFileSystem { */ throw new AssertionError(); } - - private static File getPathWithoutSchemeAndAuthority(Path path) { - Path newPath = path.isUriPathAbsolute() ? - new Path(null, null, path.toUri().getPath()) : - path; - - // Path.toString() removes leading slash before drive spec on Windows. - return new File(newPath.toString()); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java index edefa53e8a..b1df90aca0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java @@ -123,7 +123,7 @@ public class Hdfs extends AbstractFileSystem { throws IOException, UnresolvedLinkException { HdfsFileStatus fi = dfs.getFileInfo(getUriPath(f)); if (fi != null) { - return makeQualified(fi, f); + return fi.makeQualified(getUri(), f); } else { throw new FileNotFoundException("File does not exist: " + f.toString()); } @@ -134,33 +134,10 @@ public class Hdfs extends AbstractFileSystem { throws IOException, UnresolvedLinkException { HdfsFileStatus fi = dfs.getFileLinkInfo(getUriPath(f)); if (fi != null) { - return makeQualified(fi, f); + return fi.makeQualified(getUri(), f); } else { throw new FileNotFoundException("File does not exist: " + f); } - } - - private FileStatus makeQualified(HdfsFileStatus f, Path parent) { - // NB: symlink is made fully-qualified in FileContext. - return new FileStatus(f.getLen(), f.isDir(), f.getReplication(), - f.getBlockSize(), f.getModificationTime(), - f.getAccessTime(), - f.getPermission(), f.getOwner(), f.getGroup(), - f.isSymlink() ? new Path(f.getSymlink()) : null, - (f.getFullPath(parent)).makeQualified( - getUri(), null)); // fully-qualify path - } - - private LocatedFileStatus makeQualifiedLocated( - HdfsLocatedFileStatus f, Path parent) { - return new LocatedFileStatus(f.getLen(), f.isDir(), f.getReplication(), - f.getBlockSize(), f.getModificationTime(), - f.getAccessTime(), - f.getPermission(), f.getOwner(), f.getGroup(), - f.isSymlink() ? new Path(f.getSymlink()) : null, - (f.getFullPath(parent)).makeQualified( - getUri(), null), // fully-qualify path - DFSUtil.locatedBlocks2Locations(f.getBlockLocations())); } @Override @@ -181,7 +158,8 @@ public class Hdfs extends AbstractFileSystem { @Override public LocatedFileStatus next() throws IOException { - return makeQualifiedLocated((HdfsLocatedFileStatus)getNext(), p); + return ((HdfsLocatedFileStatus)getNext()).makeQualifiedLocated( + getUri(), p); } }; } @@ -194,7 +172,7 @@ public class Hdfs extends AbstractFileSystem { @Override public FileStatus next() throws IOException { - return makeQualified(getNext(), f); + return getNext().makeQualified(getUri(), f); } }; } @@ -278,7 +256,7 @@ public class Hdfs extends AbstractFileSystem { if (!thisListing.hasMore()) { // got all entries of the directory FileStatus[] stats = new FileStatus[partialListing.length]; for (int i = 0; i < partialListing.length; i++) { - stats[i] = makeQualified(partialListing[i], f); + stats[i] = partialListing[i].makeQualified(getUri(), f); } return stats; } @@ -291,7 +269,7 @@ public class Hdfs extends AbstractFileSystem { new ArrayList(totalNumEntries); // add the first batch of entries to the array list for (HdfsFileStatus fileStatus : partialListing) { - listing.add(makeQualified(fileStatus, f)); + listing.add(fileStatus.makeQualified(getUri(), f)); } // now fetch more entries @@ -305,7 +283,7 @@ public class Hdfs extends AbstractFileSystem { partialListing = thisListing.getPartialListing(); for (HdfsFileStatus fileStatus : partialListing) { - listing.add(makeQualified(fileStatus, f)); + listing.add(fileStatus.makeQualified(getUri(), f)); } } while (thisListing.hasMore()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java index 2d8cd24ec5..ebc6f5c659 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java @@ -17,8 +17,11 @@ */ package org.apache.hadoop.hdfs.protocol; +import java.net.URI; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSUtil; @@ -238,4 +241,14 @@ public class HdfsFileStatus { final public int getChildrenNum() { return childrenNum; } + + final public FileStatus makeQualified(URI defaultUri, Path path) { + return new FileStatus(getLen(), isDir(), getReplication(), + getBlockSize(), getModificationTime(), + getAccessTime(), + getPermission(), getOwner(), getGroup(), + isSymlink() ? new Path(getSymlink()) : null, + (getFullPath(path)).makeQualified( + defaultUri, null)); // fully-qualify path + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java index 0949cece7e..401f5e3ab6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java @@ -17,9 +17,14 @@ */ package org.apache.hadoop.hdfs.protocol; +import java.net.URI; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSUtil; /** * Interface that represents the over the wire information @@ -61,4 +66,16 @@ public class HdfsLocatedFileStatus extends HdfsFileStatus { public LocatedBlocks getBlockLocations() { return locations; } + + final public LocatedFileStatus makeQualifiedLocated(URI defaultUri, + Path path) { + return new LocatedFileStatus(getLen(), isDir(), getReplication(), + getBlockSize(), getModificationTime(), + getAccessTime(), + getPermission(), getOwner(), getGroup(), + isSymlink() ? new Path(getSymlink()) : null, + (getFullPath(path)).makeQualified( + defaultUri, null), // fully-qualify path + DFSUtil.locatedBlocks2Locations(getBlockLocations())); + } }