HADOOP-11785. Reduce the number of listStatus operation in distcp buildListing (Zoran Dimitrijevic via Colin P. McCabe)

This commit is contained in:
Colin Patrick Mccabe 2015-04-03 14:08:25 -07:00
parent db80e42891
commit 932730df7d
2 changed files with 21 additions and 23 deletions

View File

@ -483,6 +483,9 @@ Release 2.8.0 - UNRELEASED
OPTIMIZATIONS
HADOOP-11785. Reduce the number of listStatus operation in distcp
buildListing (Zoran Dimitrijevic via Colin P. McCabe)
BUG FIXES
HADOOP-10027. *Compressor_deflateBytesDirect passes instance instead of

View File

@ -193,12 +193,12 @@ public void doBuildListing(SequenceFile.Writer fileListWriter,
writeToFileListing(fileListWriter, sourceCopyListingStatus,
sourcePathRoot, options);
if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
if (sourceStatus.isDirectory()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
LOG.debug("Traversing source dir: " + sourceStatus.getPath());
}
traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
options);
traverseDirectory(fileListWriter, sourceFS, sourceStatus,
sourcePathRoot, options);
}
}
}
@ -275,22 +275,17 @@ private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
FileStatus fileStatus) throws IOException {
return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
}
private static FileStatus[] getChildren(FileSystem fileSystem,
FileStatus parent) throws IOException {
return fileSystem.listStatus(parent.getPath());
}
private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
FileStatus sourceStatus,
Path sourcePathRoot,
DistCpOptions options)
throws IOException {
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
private void traverseDirectory(SequenceFile.Writer fileListWriter,
FileSystem sourceFS,
FileStatus sourceStatus,
Path sourcePathRoot,
DistCpOptions options)
throws IOException {
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
final boolean preserveRawXattrs = options.shouldPreserveRawXattrs();
@ -299,9 +294,9 @@ private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
while (!pathStack.isEmpty()) {
for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
if (LOG.isDebugEnabled())
LOG.debug("Recording source-path: "
+ sourceStatus.getPath() + " for copy.");
if (LOG.isDebugEnabled()) {
LOG.debug("Recording source-path: " + child.getPath() + " for copy.");
}
CopyListingFileStatus childCopyListingStatus =
DistCpUtils.toCopyListingFileStatus(sourceFS, child,
preserveAcls && child.isDirectory(),
@ -309,16 +304,16 @@ private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
preserveRawXattrs && child.isDirectory());
writeToFileListing(fileListWriter, childCopyListingStatus,
sourcePathRoot, options);
if (isDirectoryAndNotEmpty(sourceFS, child)) {
if (LOG.isDebugEnabled())
LOG.debug("Traversing non-empty source dir: "
+ sourceStatus.getPath());
if (child.isDirectory()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Traversing into source dir: " + child.getPath());
}
pathStack.push(child);
}
}
}
}
private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus, Path sourcePathRoot,
DistCpOptions options) throws IOException {