HADOOP-16775. DistCp reuses the same temp file within the task for different files.
Contributed by Amir Shenavandeh. This avoids overwrite consistency issues with S3 and other stores -though given S3's copy operation is O(data), you are still best of using -direct when distcp-ing to it. Change-Id: I8dc9f048ad0cc57ff01543b849da1ce4eaadf8c3
This commit is contained in:
parent
eca7e14c2f
commit
b6dc00f481
@ -230,7 +230,8 @@ private Path getTempFile(Path target, Mapper.Context context) {
|
|||||||
Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent()
|
Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent()
|
||||||
: targetWorkPath;
|
: targetWorkPath;
|
||||||
Path tempFile = new Path(root, ".distcp.tmp." +
|
Path tempFile = new Path(root, ".distcp.tmp." +
|
||||||
context.getTaskAttemptID().toString());
|
context.getTaskAttemptID().toString() +
|
||||||
|
"." + String.valueOf(System.currentTimeMillis()));
|
||||||
LOG.info("Creating temp file: {}", tempFile);
|
LOG.info("Creating temp file: {}", tempFile);
|
||||||
return tempFile;
|
return tempFile;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user