diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6a6fcb5c05..02ed03d557 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2485,6 +2485,8 @@ Release 0.23.7 - UNRELEASED HDFS-4581. checkDiskError should not be called on network errors (Rohit Kochar via kihwal) + HDFS-4649. Webhdfs cannot list large directories (daryn via kihwal) + Release 0.23.6 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index f88f085b3e..288455bd2b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -718,9 +718,15 @@ private static DirectoryListing getDirectoryListing(final NamenodeProtocols np, private static StreamingOutput getListingStream(final NamenodeProtocols np, final String p) throws IOException { - final DirectoryListing first = getDirectoryListing(np, p, + // allows exceptions like FNF or ACE to prevent http response of 200 for + // a failure since we can't (currently) return error responses in the + // middle of a streaming operation + final DirectoryListing firstDirList = getDirectoryListing(np, p, HdfsFileStatus.EMPTY_NAME); + // must save ugi because the streaming object will be executed outside + // the remote user's ugi + final UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); return new StreamingOutput() { @Override public void write(final OutputStream outstream) throws IOException { @@ -729,21 +735,32 @@ public void write(final OutputStream outstream) throws IOException { out.println("{\"" + FileStatus.class.getSimpleName() + "es\":{\"" + FileStatus.class.getSimpleName() + "\":["); - final HdfsFileStatus[] partial = first.getPartialListing(); - if (partial.length > 0) { - out.print(JsonUtil.toJsonString(partial[0], false)); - } - for(int i = 1; i < partial.length; i++) { - out.println(','); - out.print(JsonUtil.toJsonString(partial[i], false)); - } - - for(DirectoryListing curr = first; curr.hasMore(); ) { - curr = getDirectoryListing(np, p, curr.getLastName()); - for(HdfsFileStatus s : curr.getPartialListing()) { - out.println(','); - out.print(JsonUtil.toJsonString(s, false)); - } + try { + // restore remote user's ugi + ugi.doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws IOException { + long n = 0; + for (DirectoryListing dirList = firstDirList; ; + dirList = getDirectoryListing(np, p, dirList.getLastName()) + ) { + // send each segment of the directory listing + for (HdfsFileStatus s : dirList.getPartialListing()) { + if (n++ > 0) { + out.println(','); + } + out.print(JsonUtil.toJsonString(s, false)); + } + // stop if last segment + if (!dirList.hasMore()) { + break; + } + } + return null; + } + }); + } catch (InterruptedException e) { + throw new IOException(e); } out.println(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 969ad5c1fb..3998fdc3c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.web; import java.io.IOException; +import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; import java.util.Random; import org.apache.commons.logging.Log; @@ -29,9 +31,13 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.TestDFSClientRetries; import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -208,4 +214,48 @@ public void testNamenodeRestart() throws Exception { final Configuration conf = WebHdfsTestUtil.createConf(); TestDFSClientRetries.namenodeRestartTest(conf, true); } + + @Test(timeout=300000) + public void testLargeDirectory() throws Exception { + final Configuration conf = WebHdfsTestUtil.createConf(); + final int listLimit = 2; + // force small chunking of directory listing + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, listLimit); + // force paths to be only owner-accessible to ensure ugi isn't changing + // during listStatus + FsPermission.setUMask(conf, new FsPermission((short)0077)); + + final MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + try { + cluster.waitActive(); + WebHdfsTestUtil.getWebHdfsFileSystem(conf).setPermission( + new Path("/"), + new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); + + // trick the NN into not believing it's not the superuser so we can + // tell if the correct user is used by listStatus + UserGroupInformation.setLoginUser( + UserGroupInformation.createUserForTesting( + "not-superuser", new String[]{"not-supergroup"})); + + UserGroupInformation.createUserForTesting("me", new String[]{"my-group"}) + .doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws IOException, URISyntaxException { + FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf); + Path d = new Path("/my-dir"); + Assert.assertTrue(fs.mkdirs(d)); + for (int i=0; i < listLimit*3; i++) { + Path p = new Path(d, "file-"+i); + Assert.assertTrue(fs.createNewFile(p)); + } + Assert.assertEquals(listLimit*3, fs.listStatus(d).length); + return null; + } + }); + } finally { + cluster.shutdown(); + } + } }