HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)
This commit is contained in:
parent
cbf0ae742a
commit
3cde37c991
@ -543,6 +543,8 @@ Release 2.6.0 - UNRELEASED
|
|||||||
HADOOP-11017. KMS delegation token secret manager should be able to use
|
HADOOP-11017. KMS delegation token secret manager should be able to use
|
||||||
zookeeper as store. (asuresh via tucu)
|
zookeeper as store. (asuresh via tucu)
|
||||||
|
|
||||||
|
HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HADOOP-10838. Byte array native checksumming. (James Thomas via todd)
|
HADOOP-10838. Byte array native checksumming. (James Thomas via todd)
|
||||||
|
@ -101,7 +101,7 @@ public void setAclEntries(List<AclEntry> aclEntries) {
|
|||||||
* @return Map<String, byte[]> containing all xAttrs
|
* @return Map<String, byte[]> containing all xAttrs
|
||||||
*/
|
*/
|
||||||
public Map<String, byte[]> getXAttrs() {
|
public Map<String, byte[]> getXAttrs() {
|
||||||
return xAttrs;
|
return xAttrs != null ? xAttrs : Collections.<String, byte[]>emptyMap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -37,18 +37,21 @@ public enum DistCpOptionSwitch {
|
|||||||
/**
|
/**
|
||||||
* Preserves status of file/path in the target.
|
* Preserves status of file/path in the target.
|
||||||
* Default behavior with -p, is to preserve replication,
|
* Default behavior with -p, is to preserve replication,
|
||||||
* block size, user, group, permission and checksum type on the target file.
|
* block size, user, group, permission, checksum type and timestamps on the
|
||||||
* Note that when preserving checksum type, block size is also preserved.
|
* target file. Note that when preserving checksum type, block size is also
|
||||||
|
* preserved.
|
||||||
*
|
*
|
||||||
* If any of the optional switches are present among rbugpc, then
|
* @see PRESERVE_STATUS_DEFAULT
|
||||||
|
*
|
||||||
|
* If any of the optional switches are present among rbugpcaxt, then
|
||||||
* only the corresponding file attribute is preserved.
|
* only the corresponding file attribute is preserved.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
|
PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
|
||||||
new Option("p", true, "preserve status (rbugpcax)(replication, " +
|
new Option("p", true, "preserve status (rbugpcaxt)(replication, " +
|
||||||
"block-size, user, group, permission, checksum-type, ACL, XATTR). " +
|
"block-size, user, group, permission, checksum-type, ACL, XATTR, " +
|
||||||
"If -p is specified with no <arg>, then preserves replication, " +
|
"timestamps). If -p is specified with no <arg>, then preserves " +
|
||||||
"block size, user, group, permission and checksum type." +
|
"replication, block size, user, group, permission, checksum type " +
|
||||||
|
"and timestamps. " +
|
||||||
"raw.* xattrs are preserved when both the source and destination " +
|
"raw.* xattrs are preserved when both the source and destination " +
|
||||||
"paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" +
|
"paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" +
|
||||||
"preservation is independent of the -p flag." +
|
"preservation is independent of the -p flag." +
|
||||||
@ -166,7 +169,7 @@ public enum DistCpOptionSwitch {
|
|||||||
BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
|
BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
|
||||||
new Option("bandwidth", true, "Specify bandwidth per map in MB"));
|
new Option("bandwidth", true, "Specify bandwidth per map in MB"));
|
||||||
|
|
||||||
static final String PRESERVE_STATUS_DEFAULT = "-prbugpc";
|
public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
|
||||||
private final String confLabel;
|
private final String confLabel;
|
||||||
private final Option option;
|
private final Option option;
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ public class DistCpOptions {
|
|||||||
private boolean targetPathExists = true;
|
private boolean targetPathExists = true;
|
||||||
|
|
||||||
public static enum FileAttribute{
|
public static enum FileAttribute{
|
||||||
REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR;
|
REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR, TIMES;
|
||||||
|
|
||||||
public static FileAttribute getAttribute(char symbol) {
|
public static FileAttribute getAttribute(char symbol) {
|
||||||
for (FileAttribute attribute : values()) {
|
for (FileAttribute attribute : values()) {
|
||||||
|
@ -18,39 +18,39 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.tools.util;
|
package org.apache.hadoop.tools.util;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import java.io.IOException;
|
||||||
|
import java.net.InetAddress;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
|
import java.text.DecimalFormat;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileChecksum;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.FileChecksum;
|
|
||||||
import org.apache.hadoop.fs.XAttr;
|
import org.apache.hadoop.fs.XAttr;
|
||||||
import org.apache.hadoop.fs.permission.AclEntry;
|
import org.apache.hadoop.fs.permission.AclEntry;
|
||||||
import org.apache.hadoop.fs.permission.AclUtil;
|
import org.apache.hadoop.fs.permission.AclUtil;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.InputFormat;
|
||||||
|
import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
|
||||||
import org.apache.hadoop.tools.CopyListing.XAttrsNotSupportedException;
|
import org.apache.hadoop.tools.CopyListing.XAttrsNotSupportedException;
|
||||||
import org.apache.hadoop.tools.CopyListingFileStatus;
|
import org.apache.hadoop.tools.CopyListingFileStatus;
|
||||||
|
import org.apache.hadoop.tools.DistCpOptions;
|
||||||
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
|
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
|
||||||
import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
|
import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
|
||||||
import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
|
|
||||||
import org.apache.hadoop.tools.DistCpOptions;
|
|
||||||
import org.apache.hadoop.mapreduce.InputFormat;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import com.google.common.collect.Maps;
|
||||||
import java.util.EnumSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.text.DecimalFormat;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.net.UnknownHostException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility functions used in DistCp.
|
* Utility functions used in DistCp.
|
||||||
@ -163,7 +163,7 @@ public static String packAttributes(EnumSet<FileAttribute> attributes) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Un packs preservation attribute string containing the first character of
|
* Unpacks preservation attribute string containing the first character of
|
||||||
* each preservation attribute back to a set of attributes to preserve
|
* each preservation attribute back to a set of attributes to preserve
|
||||||
* @param attributes - Attribute string
|
* @param attributes - Attribute string
|
||||||
* @return - Attribute set
|
* @return - Attribute set
|
||||||
@ -209,7 +209,7 @@ public static void preserve(FileSystem targetFS, Path path,
|
|||||||
if (!srcAcl.equals(targetAcl)) {
|
if (!srcAcl.equals(targetAcl)) {
|
||||||
targetFS.setAcl(path, srcAcl);
|
targetFS.setAcl(path, srcAcl);
|
||||||
}
|
}
|
||||||
// setAcl can't preserve sticky bit, so also call setPermission if needed.
|
// setAcl doesn't preserve sticky bit, so also call setPermission if needed.
|
||||||
if (srcFileStatus.getPermission().getStickyBit() !=
|
if (srcFileStatus.getPermission().getStickyBit() !=
|
||||||
targetFileStatus.getPermission().getStickyBit()) {
|
targetFileStatus.getPermission().getStickyBit()) {
|
||||||
targetFS.setPermission(path, srcFileStatus.getPermission());
|
targetFS.setPermission(path, srcFileStatus.getPermission());
|
||||||
@ -225,30 +225,28 @@ public static void preserve(FileSystem targetFS, Path path,
|
|||||||
Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs();
|
Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs();
|
||||||
Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path);
|
Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path);
|
||||||
if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) {
|
if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) {
|
||||||
Iterator<Entry<String, byte[]>> iter = srcXAttrs.entrySet().iterator();
|
for (Entry<String, byte[]> entry : srcXAttrs.entrySet()) {
|
||||||
while (iter.hasNext()) {
|
String xattrName = entry.getKey();
|
||||||
Entry<String, byte[]> entry = iter.next();
|
|
||||||
final String xattrName = entry.getKey();
|
|
||||||
if (xattrName.startsWith(rawNS) || preserveXAttrs) {
|
if (xattrName.startsWith(rawNS) || preserveXAttrs) {
|
||||||
targetFS.setXAttr(path, entry.getKey(), entry.getValue());
|
targetFS.setXAttr(path, xattrName, entry.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() &&
|
if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() &&
|
||||||
srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
|
(srcFileStatus.getReplication() != targetFileStatus.getReplication())) {
|
||||||
targetFS.setReplication(path, srcFileStatus.getReplication());
|
targetFS.setReplication(path, srcFileStatus.getReplication());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attributes.contains(FileAttribute.GROUP) &&
|
if (attributes.contains(FileAttribute.GROUP) &&
|
||||||
!group.equals(srcFileStatus.getGroup())) {
|
!group.equals(srcFileStatus.getGroup())) {
|
||||||
group = srcFileStatus.getGroup();
|
group = srcFileStatus.getGroup();
|
||||||
chown = true;
|
chown = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (attributes.contains(FileAttribute.USER) &&
|
if (attributes.contains(FileAttribute.USER) &&
|
||||||
!user.equals(srcFileStatus.getOwner())) {
|
!user.equals(srcFileStatus.getOwner())) {
|
||||||
user = srcFileStatus.getOwner();
|
user = srcFileStatus.getOwner();
|
||||||
chown = true;
|
chown = true;
|
||||||
}
|
}
|
||||||
@ -256,6 +254,12 @@ public static void preserve(FileSystem targetFS, Path path,
|
|||||||
if (chown) {
|
if (chown) {
|
||||||
targetFS.setOwner(path, user, group);
|
targetFS.setOwner(path, user, group);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (attributes.contains(FileAttribute.TIMES)) {
|
||||||
|
targetFS.setTimes(path,
|
||||||
|
srcFileStatus.getModificationTime(),
|
||||||
|
srcFileStatus.getAccessTime());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -497,7 +497,7 @@ public void testPreserve() {
|
|||||||
attribIterator.next();
|
attribIterator.next();
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(i, 6);
|
Assert.assertEquals(i, DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT.length() - 2);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
OptionsParser.parse(new String[] {
|
OptionsParser.parse(new String[] {
|
||||||
|
@ -590,6 +590,7 @@ public StubContext run() {
|
|||||||
EnumSet.allOf(DistCpOptions.FileAttribute.class);
|
EnumSet.allOf(DistCpOptions.FileAttribute.class);
|
||||||
preserveStatus.remove(DistCpOptions.FileAttribute.ACL);
|
preserveStatus.remove(DistCpOptions.FileAttribute.ACL);
|
||||||
preserveStatus.remove(DistCpOptions.FileAttribute.XATTR);
|
preserveStatus.remove(DistCpOptions.FileAttribute.XATTR);
|
||||||
|
preserveStatus.remove(DistCpOptions.FileAttribute.TIMES);
|
||||||
|
|
||||||
context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
|
context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
|
||||||
DistCpUtils.packAttributes(preserveStatus));
|
DistCpUtils.packAttributes(preserveStatus));
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user