diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 62141b497f..1e33bde4f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -499,6 +499,9 @@ Release 2.4.0 - UNRELEASED HDFS-5434. Change block placement policy constructors from package private to protected. (Buddy Taylor via Arpit Agarwal) + HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts + via kihwal) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 9a907cc948..07d9ce7da8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -174,7 +174,6 @@ public class FSDirectory implements Closeable { DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT); this.lsLimit = configuredLimit>0 ? configuredLimit : DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT; - this.contentCountLimit = conf.getInt( DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_KEY, DFSConfigKeys.DFS_CONTENT_SUMMARY_LIMIT_DEFAULT); @@ -1490,6 +1489,11 @@ public class FSDirectory implements Closeable { /** * Get a partial listing of the indicated directory * + * We will stop when any of the following conditions is met: + * 1) this.lsLimit files have been added + * 2) needLocation is true AND enough files have been added such + * that at least this.lsLimit block locations are in the response + * * @param src the directory name * @param startAfter the name to start listing after * @param needLocation if block locations are returned @@ -1521,14 +1525,30 @@ public class FSDirectory implements Closeable { int startChild = INodeDirectory.nextChild(contents, startAfter); int totalNumChildren = contents.size(); int numOfListing = Math.min(totalNumChildren-startChild, this.lsLimit); + int locationBudget = this.lsLimit; + int listingCnt = 0; HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing]; - for (int i=0; i0; i++) { INode cur = contents.get(startChild+i); listing[i] = createFileStatus(cur.getLocalNameBytes(), cur, needLocation, snapshot); + listingCnt++; + if (needLocation) { + // Once we hit lsLimit locations, stop. + // This helps to prevent excessively large response payloads. + // Approximate #locations with locatedBlockCount() * repl_factor + LocatedBlocks blks = + ((HdfsLocatedFileStatus)listing[i]).getBlockLocations(); + locationBudget -= (blks == null) ? 0 : + blks.locatedBlockCount() * listing[i].getReplication(); + } + } + // truncate return array if necessary + if (listingCnt < numOfListing) { + listing = Arrays.copyOf(listing, listingCnt); } return new DirectoryListing( - listing, totalNumChildren-startChild-numOfListing); + listing, totalNumChildren-startChild-listingCnt); } finally { readUnlock(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index c8854c6932..be235e151e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -26,6 +26,7 @@ import static org.junit.Assert.fail; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -952,7 +953,76 @@ public class TestINodeFile { } } } - + @Test + public void testLocationLimitInListingOps() throws Exception { + final Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9); // 3 blocks * 3 replicas + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + cluster.waitActive(); + final DistributedFileSystem hdfs = cluster.getFileSystem(); + ArrayList source = new ArrayList(); + + // tmp1 holds files with 3 blocks, 3 replicas + // tmp2 holds files with 3 blocks, 1 replica + hdfs.mkdirs(new Path("/tmp1")); + hdfs.mkdirs(new Path("/tmp2")); + + source.add("f1"); + source.add("f2"); + + int numEntries = source.size(); + for (int j=0;j