From 33fe54a25f04673048f0f0db4abaa42535f043ec Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 3 Jul 2013 23:29:08 +0000 Subject: [PATCH] HADOOP-9416. Add new symlink resolution methods in FileSystem and FileSystemLinkResolver. (awang via cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1499602 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../org/apache/hadoop/fs/FSLinkResolver.java | 44 +++------ .../org/apache/hadoop/fs/FileContext.java | 3 +- .../java/org/apache/hadoop/fs/FileSystem.java | 78 ++++++++++++++- .../hadoop/fs/FileSystemLinkResolver.java | 99 +++++++++++++++++++ .../apache/hadoop/fs/FilterFileSystem.java | 27 +++++ .../org/apache/hadoop/fs/FsConstants.java | 6 +- .../hadoop/fs/FileSystemTestWrapper.java | 28 ++++-- .../hadoop/fs/TestFilterFileSystem.java | 1 + 9 files changed, 244 insertions(+), 45 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 66e0522c2c..db002a4b70 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -309,6 +309,9 @@ Release 2.2.0 - UNRELEASED HADOOP-9414. Refactor out FSLinkResolver and relevant helper methods. (Andrew Wang via Colin Patrick McCabe) + HADOOP-9416. Add new symlink resolution methods in FileSystem and + FileSystemLinkResolver. (Andrew Wang via Colin Patrick McCabe) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java index 625e5f24ed..e5718bedf8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java @@ -20,30 +20,20 @@ import java.io.IOException; import java.net.URI; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + /** - * Class used to perform an operation on and resolve symlinks in a - * path. The operation may potentially span multiple file systems. + * Used primarily by {@link FileContext} to operate on and resolve + * symlinks in a path. Operations can potentially span multiple + * {@link AbstractFileSystem}s. + * + * @see FileSystemLinkResolver */ +@InterfaceAudience.Private +@InterfaceStability.Evolving public abstract class FSLinkResolver { - private static final int MAX_PATH_LINKS = 32; - - /** - * See {@link #qualifySymlinkTarget(URI, Path, Path)} - */ - public static Path qualifySymlinkTarget(final AbstractFileSystem pathFS, - Path pathWithLink, Path target) { - return qualifySymlinkTarget(pathFS.getUri(), pathWithLink, target); - } - - /** - * See {@link #qualifySymlinkTarget(URI, Path, Path)} - */ - public static Path qualifySymlinkTarget(final FileSystem pathFS, - Path pathWithLink, Path target) { - return qualifySymlinkTarget(pathFS.getUri(), pathWithLink, target); - } - /** * Return a fully-qualified version of the given symlink target if it * has no scheme and authority. Partially and fully-qualified paths @@ -53,7 +43,7 @@ public static Path qualifySymlinkTarget(final FileSystem pathFS, * @param target The symlink's absolute target * @return Fully qualified version of the target. */ - private static Path qualifySymlinkTarget(final URI pathURI, + public static Path qualifySymlinkTarget(final URI pathURI, Path pathWithLink, Path target) { // NB: makeQualified uses the target's scheme and authority, if // specified, and the scheme and authority of pathURI, if not. @@ -64,8 +54,6 @@ private static Path qualifySymlinkTarget(final URI pathURI, pathWithLink.getParent()) : target; } - // FileContext / AbstractFileSystem resolution methods - /** * Generic helper function overridden on instantiation to perform a * specific operation on the given file system using the given path @@ -77,10 +65,8 @@ private static Path qualifySymlinkTarget(final URI pathURI, * not be resolved * @throws IOException an I/O error occurred */ - public T next(final AbstractFileSystem fs, final Path p) - throws IOException, UnresolvedLinkException { - throw new AssertionError("Should not be called without first overriding!"); - } + abstract public T next(final AbstractFileSystem fs, final Path p) + throws IOException, UnresolvedLinkException; /** * Performs the operation specified by the next function, calling it @@ -104,12 +90,12 @@ public T resolve(final FileContext fc, final Path path) throws IOException { in = next(fs, p); isLink = false; } catch (UnresolvedLinkException e) { - if (count++ > MAX_PATH_LINKS) { + if (count++ > FsConstants.MAX_PATH_LINKS) { throw new IOException("Possible cyclic loop while " + "following symbolic link " + path); } // Resolve the first unresolved path component - p = FSLinkResolver.qualifySymlinkTarget(fs, p, fs.getLinkTarget(p)); + p = qualifySymlinkTarget(fs.getUri(), p, fs.getLinkTarget(p)); fs = fc.getFSofPath(p); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index 32a58bf83b..3d7b2e50cb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -1128,7 +1128,8 @@ public FileStatus next(final AbstractFileSystem fs, final Path p) throws IOException, UnresolvedLinkException { FileStatus fi = fs.getFileLinkStatus(p); if (fi.isSymlink()) { - fi.setSymlink(qualifySymlinkTarget(fs, p, fi.getSymlink())); + fi.setSymlink(FSLinkResolver.qualifySymlinkTarget(fs.getUri(), p, + fi.getSymlink())); } return fi; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 19dd348d59..4a8c60e2e0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -53,6 +53,7 @@ import org.apache.hadoop.io.MultipleIOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; @@ -262,6 +263,16 @@ protected int getDefaultPort() { return 0; } + protected static FileSystem getFSofPath(final Path absOrFqPath, + final Configuration conf) + throws UnsupportedFileSystemException, IOException { + absOrFqPath.checkNotSchemeWithRelative(); + absOrFqPath.checkNotRelative(); + + // Uses the default file system if not fully qualified + return get(absOrFqPath.toUri(), conf); + } + /** * Get a canonical service name for this file system. The token cache is * the only user of the canonical service name, and uses it to lookup this @@ -811,7 +822,9 @@ public FSDataOutputStream create(Path f, short replication) public FSDataOutputStream create(Path f, short replication, Progressable progress) throws IOException { return create(f, true, - getConf().getInt("io.file.buffer.size", 4096), + getConf().getInt( + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT), replication, getDefaultBlockSize(f), progress); } @@ -1243,7 +1256,7 @@ public boolean setReplication(Path src, short replication) protected void rename(final Path src, final Path dst, final Rename... options) throws IOException { // Default implementation - final FileStatus srcStatus = getFileStatus(src); + final FileStatus srcStatus = getFileLinkStatus(src); if (srcStatus == null) { throw new FileNotFoundException("rename source " + src + " not found."); } @@ -1259,7 +1272,7 @@ protected void rename(final Path src, final Path dst, FileStatus dstStatus; try { - dstStatus = getFileStatus(dst); + dstStatus = getFileLinkStatus(dst); } catch (IOException e) { dstStatus = null; } @@ -2173,6 +2186,65 @@ public short getDefaultReplication(Path path) { */ public abstract FileStatus getFileStatus(Path f) throws IOException; + /** + * See {@link FileContext#fixRelativePart} + */ + protected Path fixRelativePart(Path p) { + if (p.isUriPathAbsolute()) { + return p; + } else { + return new Path(getWorkingDirectory(), p); + } + } + + /** + * See {@link FileContext#createSymlink(Path, Path, boolean)} + */ + public void createSymlink(final Path target, final Path link, + final boolean createParent) throws AccessControlException, + FileAlreadyExistsException, FileNotFoundException, + ParentNotDirectoryException, UnsupportedFileSystemException, + IOException { + // Supporting filesystems should override this method + throw new UnsupportedOperationException( + "Filesystem does not support symlinks!"); + } + + /** + * See {@link FileContext#getFileLinkStatus(Path)} + */ + public FileStatus getFileLinkStatus(final Path f) + throws AccessControlException, FileNotFoundException, + UnsupportedFileSystemException, IOException { + // Supporting filesystems should override this method + return getFileStatus(f); + } + + /** + * See {@link AbstractFileSystem#supportsSymlinks()} + */ + public boolean supportsSymlinks() { + return false; + } + + /** + * See {@link FileContext#getLinkTarget(Path)} + */ + public Path getLinkTarget(Path f) throws IOException { + // Supporting filesystems should override this method + throw new UnsupportedOperationException( + "Filesystem does not support symlinks!"); + } + + /** + * See {@link AbstractFileSystem#getLinkTarget(Path)} + */ + protected Path resolveLink(Path f) throws IOException { + // Supporting filesystems should override this method + throw new UnsupportedOperationException( + "Filesystem does not support symlinks!"); + } + /** * Get the checksum of a file. * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java new file mode 100644 index 0000000000..4d67b348f6 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.io.IOException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * FileSystem-specific class used to operate on and resolve symlinks in a path. + * Operation can potentially span multiple {@link FileSystem}s. + * + * @see FSLinkResolver + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public abstract class FileSystemLinkResolver { + + /** + * FileSystem subclass-specific implementation of superclass method. + * Overridden on instantiation to perform the actual method call, which throws + * an UnresolvedLinkException if called on an unresolved {@link Path}. + * @param p Path on which to perform an operation + * @return Generic type returned by operation + * @throws IOException + * @throws UnresolvedLinkException + */ + abstract public T doCall(final Path p) throws IOException, + UnresolvedLinkException; + + /** + * Calls the abstract FileSystem call equivalent to the specialized subclass + * implementation in {@link #doCall(Path)}. This is used when retrying the + * call with a newly resolved Path and corresponding new FileSystem. + * + * @param fs + * FileSystem with which to retry call + * @param p + * Resolved Target of path + * @return Generic type determined by implementation + * @throws IOException + */ + abstract public T next(final FileSystem fs, final Path p) throws IOException; + + /** + * Attempt calling overridden {@link #doCall(Path)} method with + * specified {@link FileSystem} and {@link Path}. If the call fails with an + * UnresolvedLinkException, it will try to resolve the path and retry the call + * by calling {@link #next(FileSystem, Path)}. + * @param filesys FileSystem with which to try call + * @param path Path with which to try call + * @return Generic type determined by implementation + * @throws IOException + */ + public T resolve(final FileSystem filesys, final Path path) + throws IOException { + int count = 0; + T in = null; + Path p = path; + FileSystem fs = FileSystem.getFSofPath(p, filesys.getConf()); + for (boolean isLink = true; isLink;) { + try { + in = doCall(p); + isLink = false; + } catch (UnresolvedLinkException e) { + if (count++ > FsConstants.MAX_PATH_LINKS) { + throw new IOException("Possible cyclic loop while " + + "following symbolic link " + path); + } + // Resolve the first unresolved path component + p = FSLinkResolver.qualifySymlinkTarget(fs.getUri(), p, + filesys.resolveLink(p)); + fs = FileSystem.getFSofPath(p, filesys.getConf()); + // Have to call next if it's a new FS + if (!fs.equals(filesys)) { + return next(fs, p); + } + // Else, we keep resolving with this filesystem + } + } + // Successful call, path was fully resolved + return in; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java index d526a78e39..3f7b7ed587 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.Options.ChecksumOpt; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.Progressable; /**************************************************************** @@ -397,6 +398,32 @@ public FileStatus getFileStatus(Path f) throws IOException { return fs.getFileStatus(f); } + public void createSymlink(final Path target, final Path link, + final boolean createParent) throws AccessControlException, + FileAlreadyExistsException, FileNotFoundException, + ParentNotDirectoryException, UnsupportedFileSystemException, + IOException { + fs.createSymlink(target, link, createParent); + } + + public FileStatus getFileLinkStatus(final Path f) + throws AccessControlException, FileNotFoundException, + UnsupportedFileSystemException, IOException { + return fs.getFileLinkStatus(f); + } + + public boolean supportsSymlinks() { + return fs.supportsSymlinks(); + } + + public Path getLinkTarget(Path f) throws IOException { + return fs.getLinkTarget(f); + } + + protected Path resolveLink(Path f) throws IOException { + return fs.resolveLink(f); + } + @Override public FileChecksum getFileChecksum(Path f) throws IOException { return fs.getFileChecksum(f); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java index 3ebbac220d..cfef1c3827 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsConstants.java @@ -33,8 +33,10 @@ public interface FsConstants { // URI scheme for FTP public static final String FTP_SCHEME = "ftp"; - - + + // Maximum number of symlinks to recursively resolve in a path + static final int MAX_PATH_LINKS = 32; + /** * ViewFs: viewFs file system (ie the mount file system on client side) */ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestWrapper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestWrapper.java index a6bdd38756..d4928381d0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestWrapper.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestWrapper.java @@ -133,8 +133,11 @@ public boolean isDir(Path p) throws IOException { } public boolean isSymlink(Path p) throws IOException { - throw new UnsupportedFileSystemException( - "FileSystem does not support symlinks"); + try { + return fs.getFileLinkStatus(p).isSymlink(); + } catch (FileNotFoundException e) { + return false; + } } public void writeFile(Path path, byte b[]) throws IOException { @@ -182,8 +185,16 @@ public void checkFileStatus(String path, fileType expectedType) public void checkFileLinkStatus(String path, fileType expectedType) throws IOException { - throw new UnsupportedFileSystemException( - "FileSystem does not support symlinks"); + FileStatus s = fs.getFileLinkStatus(new Path(path)); + Assert.assertNotNull(s); + if (expectedType == fileType.isDir) { + Assert.assertTrue(s.isDirectory()); + } else if (expectedType == fileType.isFile) { + Assert.assertTrue(s.isFile()); + } else if (expectedType == fileType.isSymlink) { + Assert.assertTrue(s.isSymlink()); + } + Assert.assertEquals(fs.makeQualified(new Path(path)), s.getPath()); } // @@ -215,8 +226,7 @@ public boolean delete(Path f, boolean recursive) @Override public FileStatus getFileLinkStatus(Path f) throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException, IOException { - throw new UnsupportedFileSystemException( - "FileSystem does not support symlinks"); + return fs.getFileLinkStatus(f); } @Override @@ -224,8 +234,7 @@ public void createSymlink(Path target, Path link, boolean createParent) throws AccessControlException, FileAlreadyExistsException, FileNotFoundException, ParentNotDirectoryException, UnsupportedFileSystemException, IOException { - throw new UnsupportedFileSystemException( - "FileSystem does not support symlinks"); + fs.createSymlink(target, link, createParent); } @Override @@ -297,8 +306,7 @@ public FSDataInputStream open(Path f) throws AccessControlException, @Override public Path getLinkTarget(Path f) throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException, IOException { - throw new UnsupportedFileSystemException( - "FileSystem does not support symlinks"); + return fs.getLinkTarget(f); } @Override diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java index b70f6be53d..ec2908e86c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java @@ -209,6 +209,7 @@ public Token[] addDelegationTokens(String renewer, Credentials creds) public String getScheme() { return "dontcheck"; } + public Path fixRelativePart(Path p) { return null; } } @Test