From d0ba178800d04e6e9842a1823b2205732608c0b5 Mon Sep 17 00:00:00 2001 From: Hairong Kuang Date: Fri, 30 Jul 2010 20:52:08 +0000 Subject: [PATCH] HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. Contributed by Hairong Kuang. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@980953 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../apache/hadoop/fs/AbstractFileSystem.java | 19 +- .../org/apache/hadoop/fs/FileContext.java | 213 ++++++++++-------- src/java/org/apache/hadoop/fs/FileSystem.java | 29 ++- .../org/apache/hadoop/fs/TestListFiles.java | 7 +- 5 files changed, 168 insertions(+), 101 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 261222f97c..da2b5a2525 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -101,6 +101,7 @@ Trunk (unreleased changes) HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs periodically. (Owen O'Malley and ddas via ddas) + HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong) OPTIMIZATIONS BUG FIXES diff --git a/src/java/org/apache/hadoop/fs/AbstractFileSystem.java b/src/java/org/apache/hadoop/fs/AbstractFileSystem.java index c01946bfae..9da59eed5e 100644 --- a/src/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/src/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -786,8 +786,8 @@ public void remove() { /** * The specification of this method matches that of - * {@link FileContext#listLocatedStatus(Path)} except that Path f must be for this - * file system. + * {@link FileContext#listLocatedStatus(Path)} except that Path f + * must be for this file system. */ protected Iterator listLocatedStatus(final Path f) throws AccessControlException, FileNotFoundException, @@ -795,15 +795,28 @@ protected Iterator listLocatedStatus(final Path f) return new Iterator() { private Iterator itor = listStatusIterator(f); + /** + * {@inheritDoc} + * @return {@inheritDog} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + */ @Override public boolean hasNext() { return itor.hasNext(); } + /** + * {@inheritDoc} + * @return {@inheritDoc} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + * @exception {@inheritDoc} + */ @Override public LocatedFileStatus next() { if (!hasNext()) { - throw new NoSuchElementException(); + throw new NoSuchElementException("No more entry in " + f); } FileStatus result = itor.next(); try { diff --git a/src/java/org/apache/hadoop/fs/FileContext.java b/src/java/org/apache/hadoop/fs/FileContext.java index 5daa2f5a15..279d6825f3 100644 --- a/src/java/org/apache/hadoop/fs/FileContext.java +++ b/src/java/org/apache/hadoop/fs/FileContext.java @@ -1286,103 +1286,21 @@ public Iterator next(final AbstractFileSystem fs, final Path p) } }.resolve(this, absF); } - - /** - * List the statuses and block locations of the files in the given path - * if the path is a directory. - * If the given path is a file, return the file's status and block locations. - * if recursive is true, list all file statuses and block locations in - * the subtree rooted at the given path. - * Files across symbolic links are also returned. - * - * @param f is the path - * @param recursive if the subdirectories need to be traversed recursively - * - * @return an iterator that traverses statuses of the files - * - * @throws AccessControlException If access is denied - * @throws FileNotFoundException If f does not exist - * @throws UnsupportedFileSystemException If file system for f is - * not supported - * @throws IOException If an I/O error occurred - * - * Exceptions applicable to file systems accessed over RPC: - * @throws RpcClientException If an exception occurred in the RPC client - * @throws RpcServerException If an exception occurred in the RPC server - * @throws UnexpectedServerException If server implementation throws - * undeclared exception to RPC server - */ - public Iterator listFiles( - final Path f, final boolean recursive) throws AccessControlException, - FileNotFoundException, UnsupportedFileSystemException, - IOException { - return new Iterator() { - private Stack dirs = new Stack(); - private Stack symLinks = new Stack(); - Iterator itor = listLocatedStatus(f); - LocatedFileStatus curFile; - - @Override - public boolean hasNext() { - try { - while (curFile == null) { - if (itor.hasNext()) { - handleFileStat(itor.next()); - } else if (!dirs.isEmpty()) { - Path dirPath = dirs.pop(); - itor = listLocatedStatus(dirPath); - } else if (!symLinks.isEmpty()) { - Path symLink = symLinks.pop(); - FileStatus stat = getFileStatus(symLink); - if (stat.isFile() || (recursive && stat.isDirectory())) { - itor = listLocatedStatus(stat.getPath()); - } - } else { - return false; - } - } - return true; - } catch (IOException ioe) { - throw (RuntimeException)new RuntimeException().initCause(ioe); - } - } - - private void handleFileStat(LocatedFileStatus stat) throws IOException { - if (stat.isFile()) { // file - curFile = stat; - } else if (stat.isSymlink()) { // symbolic link - symLinks.push(stat.getSymlink()); - } else if (recursive) { // directory - dirs.push(stat.getPath()); - } - } - - @Override - public LocatedFileStatus next() { - if (hasNext()) { - LocatedFileStatus result = curFile; - curFile = null; - return result; - } - throw new java.util.NoSuchElementException("No more entry in " + f); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Remove is not supported"); - - } - }; - } /** * List the statuses of the files/directories in the given path if the path is - * a directory. Each returned status contains a file's block locations. + * a directory. + * Return the file's status and block locations If the path is a file. + * + * If a returned status is a file, it contains the file's block locations. * * @param f is the path * * @return an iterator that traverses statuses of the files/directories * in the given path + * If any IO exception (for example the input directory gets deleted while + * listing is being executed), next() or hasNext() of the returned iterator + * may throw a RuntimeException with the io exception as the cause. * * @throws AccessControlException If access is denied * @throws FileNotFoundException If f does not exist @@ -1678,6 +1596,123 @@ public FileStatus[] next(final AbstractFileSystem fs, final Path p) }.resolve(FileContext.this, absF); } + /** + * List the statuses and block locations of the files in the given path. + * + * If the path is a directory, + * if recursive is false, returns files in the directory; + * if recursive is true, return files in the subtree rooted at the path. + * The subtree is traversed in the depth-first order. + * If the path is a file, return the file's status and block locations. + * Files across symbolic links are also returned. + * + * @param f is the path + * @param recursive if the subdirectories need to be traversed recursively + * + * @return an iterator that traverses statuses of the files + * If any IO exception (for example a sub-directory gets deleted while + * listing is being executed), next() or hasNext() of the returned iterator + * may throw a RuntimeException with the IO exception as the cause. + * + * @throws AccessControlException If access is denied + * @throws FileNotFoundException If f does not exist + * @throws UnsupportedFileSystemException If file system for f + * is not supported + * @throws IOException If an I/O error occurred + * + * Exceptions applicable to file systems accessed over RPC: + * @throws RpcClientException If an exception occurred in the RPC client + * @throws RpcServerException If an exception occurred in the RPC server + * @throws UnexpectedServerException If server implementation throws + * undeclared exception to RPC server + */ + public Iterator listFiles( + final Path f, final boolean recursive) throws AccessControlException, + FileNotFoundException, UnsupportedFileSystemException, + IOException { + return new Iterator() { + private Stack> itors = + new Stack>(); + Iterator curItor = listLocatedStatus(f); + LocatedFileStatus curFile; + + /** + * {@inheritDoc} + * @return {@inheritDog} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + */ + @Override + public boolean hasNext() { + while (curFile == null) { + if (curItor.hasNext()) { + handleFileStat(curItor.next()); + } else if (!itors.empty()) { + curItor = itors.pop(); + } else { + return false; + } + } + return true; + } + + /** + * Process the input stat. + * If it is a file, return the file stat. + * If it is a directory, tranverse the directory if recursive is true; + * ignore it if recursive is false. + * If it is a symlink, resolve the symlink first and then process it + * depending on if it is a file or directory. + * @param stat input status + * @throws RuntimeException if any io error occurs; the io exception + * is set as the cause of RuntimeException + */ + private void handleFileStat(LocatedFileStatus stat) { + try { + if (stat.isFile()) { // file + curFile = stat; + } else if (stat.isSymlink()) { // symbolic link + // resolve symbolic link + FileStatus symstat = FileContext.this.getFileStatus( + stat.getSymlink()); + if (symstat.isFile() || (recursive && symstat.isDirectory())) { + itors.push(curItor); + curItor = listLocatedStatus(stat.getPath()); + } + } else if (recursive) { // directory + itors.push(curItor); + curItor = listLocatedStatus(stat.getPath()); + } + } catch (IOException ioe) { + throw (RuntimeException)new RuntimeException().initCause(ioe); + } + } + + /** + * {@inheritDoc} + * @return {@inheritDoc} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + * @exception {@inheritDoc} + */ + @Override + public LocatedFileStatus next() { + if (hasNext()) { + LocatedFileStatus result = curFile; + curFile = null; + return result; + } + throw new java.util.NoSuchElementException("No more entry in " + f); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Remove is not supported"); + + } + }; + } + /** *

Return all the files that match filePattern and are not checksum * files. Results are sorted by their names. diff --git a/src/java/org/apache/hadoop/fs/FileSystem.java b/src/java/org/apache/hadoop/fs/FileSystem.java index 812195cf34..e7b93d6c71 100644 --- a/src/java/org/apache/hadoop/fs/FileSystem.java +++ b/src/java/org/apache/hadoop/fs/FileSystem.java @@ -1320,16 +1320,22 @@ private Path[] globPathsLevel(Path[] parents, String[] filePattern, } /** - * List the statuses and block locations of the files in the given path - * if the path is a directory. - * If the given path is a file, return the file's status and block locations. - * if recursive is true, list all file statuses and block locations in - * the subtree rooted at the given path. + * List the statuses and block locations of the files in the given path. + * + * If the path is a directory, + * if recursive is false, returns files in the directory; + * if recursive is true, return files in the subtree rooted at the path. + * If the path is a file, return the file's status and block locations. + * Files across symbolic links are also returned. * * @param f is the path * @param recursive if the subdirectories need to be traversed recursively * * @return an iterator that traverses statuses of the files + * If any IO exception (for example a sub-directory gets deleted while + * listing is being executed), next() or hasNext() of the returned iterator + * may throw a RuntimeException with the IO exception as the cause. + * * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ @@ -1344,6 +1350,12 @@ public Iterator listFiles( list(f); } + /** + * {@inheritDoc} + * @return {@inheritDog} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + */ @Override public boolean hasNext() { if (fileStats.isEmpty()) { @@ -1382,6 +1394,13 @@ private void list(Path dirPath) { } } + /** + * {@inheritDoc} + * @return {@inheritDoc} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + * @exception {@inheritDoc} + */ @Override public LocatedFileStatus next() { if (!hasNext()) { diff --git a/src/test/core/org/apache/hadoop/fs/TestListFiles.java b/src/test/core/org/apache/hadoop/fs/TestListFiles.java index c677ca1d5e..5ec81e2a2b 100644 --- a/src/test/core/org/apache/hadoop/fs/TestListFiles.java +++ b/src/test/core/org/apache/hadoop/fs/TestListFiles.java @@ -106,15 +106,15 @@ public void testFile() throws IOException { public void testDirectory() throws IOException { fs.mkdirs(DIR1); + // test empty directory Iterator itor = fs.listFiles( DIR1, true); assertFalse(itor.hasNext()); itor = fs.listFiles(DIR1, false); assertFalse(itor.hasNext()); - writeFile(fs, FILE2, FILE_LEN); - - // test empty directory + // testing directory with 1 file + writeFile(fs, FILE2, FILE_LEN); itor = fs.listFiles(DIR1, true); LocatedFileStatus stat = itor.next(); assertFalse(itor.hasNext()); @@ -123,7 +123,6 @@ public void testDirectory() throws IOException { assertEquals(fs.makeQualified(FILE2), stat.getPath()); assertEquals(1, stat.getBlockLocations().length); - // testing directory with 1 file itor = fs.listFiles(DIR1, false); stat = itor.next(); assertFalse(itor.hasNext());