HADOOP-11785. Reduce the number of listStatus operation in distcp buildListing (Zoran Dimitrijevic via Colin P. McCabe)
This commit is contained in:
parent
db80e42891
commit
932730df7d
@ -483,6 +483,9 @@ Release 2.8.0 - UNRELEASED
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-11785. Reduce the number of listStatus operation in distcp
|
||||
buildListing (Zoran Dimitrijevic via Colin P. McCabe)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-10027. *Compressor_deflateBytesDirect passes instance instead of
|
||||
|
@ -193,12 +193,12 @@ public void doBuildListing(SequenceFile.Writer fileListWriter,
|
||||
writeToFileListing(fileListWriter, sourceCopyListingStatus,
|
||||
sourcePathRoot, options);
|
||||
|
||||
if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
|
||||
if (sourceStatus.isDirectory()) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
|
||||
LOG.debug("Traversing source dir: " + sourceStatus.getPath());
|
||||
}
|
||||
traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot,
|
||||
options);
|
||||
traverseDirectory(fileListWriter, sourceFS, sourceStatus,
|
||||
sourcePathRoot, options);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -275,22 +275,17 @@ private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
|
||||
SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
|
||||
}
|
||||
|
||||
private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
|
||||
FileStatus fileStatus) throws IOException {
|
||||
return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
|
||||
}
|
||||
|
||||
private static FileStatus[] getChildren(FileSystem fileSystem,
|
||||
FileStatus parent) throws IOException {
|
||||
return fileSystem.listStatus(parent.getPath());
|
||||
}
|
||||
|
||||
private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
|
||||
FileStatus sourceStatus,
|
||||
Path sourcePathRoot,
|
||||
DistCpOptions options)
|
||||
throws IOException {
|
||||
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
|
||||
private void traverseDirectory(SequenceFile.Writer fileListWriter,
|
||||
FileSystem sourceFS,
|
||||
FileStatus sourceStatus,
|
||||
Path sourcePathRoot,
|
||||
DistCpOptions options)
|
||||
throws IOException {
|
||||
final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
|
||||
final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
|
||||
final boolean preserveRawXattrs = options.shouldPreserveRawXattrs();
|
||||
@ -299,9 +294,9 @@ private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
|
||||
|
||||
while (!pathStack.isEmpty()) {
|
||||
for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
|
||||
if (LOG.isDebugEnabled())
|
||||
LOG.debug("Recording source-path: "
|
||||
+ sourceStatus.getPath() + " for copy.");
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Recording source-path: " + child.getPath() + " for copy.");
|
||||
}
|
||||
CopyListingFileStatus childCopyListingStatus =
|
||||
DistCpUtils.toCopyListingFileStatus(sourceFS, child,
|
||||
preserveAcls && child.isDirectory(),
|
||||
@ -309,16 +304,16 @@ private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
|
||||
preserveRawXattrs && child.isDirectory());
|
||||
writeToFileListing(fileListWriter, childCopyListingStatus,
|
||||
sourcePathRoot, options);
|
||||
if (isDirectoryAndNotEmpty(sourceFS, child)) {
|
||||
if (LOG.isDebugEnabled())
|
||||
LOG.debug("Traversing non-empty source dir: "
|
||||
+ sourceStatus.getPath());
|
||||
if (child.isDirectory()) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Traversing into source dir: " + child.getPath());
|
||||
}
|
||||
pathStack.push(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void writeToFileListingRoot(SequenceFile.Writer fileListWriter,
|
||||
CopyListingFileStatus fileStatus, Path sourcePathRoot,
|
||||
DistCpOptions options) throws IOException {
|
||||
|
Loading…
Reference in New Issue
Block a user