HADOOP-11687. Ignore x-* and response headers when copying an Amazon S3 object. Contributed by Aaron Peterson and harsh.

This commit is contained in:
Harsh J 2016-04-01 14:18:10 +05:30
parent 3488c4f2c9
commit 256c82fe29
2 changed files with 76 additions and 1 deletions

View File

@ -26,6 +26,7 @@
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -1128,7 +1129,7 @@ private void copyFile(String srcKey, String dstKey) throws IOException {
} }
ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey); ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey);
final ObjectMetadata dstom = srcom.clone(); ObjectMetadata dstom = cloneObjectMetadata(srcom);
if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) { if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm); dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm);
} }
@ -1234,6 +1235,73 @@ public int read() throws IOException {
statistics.incrementWriteOps(1); statistics.incrementWriteOps(1);
} }
/**
* Creates a copy of the passed {@link ObjectMetadata}.
* Does so without using the {@link ObjectMetadata#clone()} method,
* to avoid copying unnecessary headers.
* @param source the {@link ObjectMetadata} to copy
* @return a copy of {@link ObjectMetadata} with only relevant attributes
*/
private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) {
// This approach may be too brittle, especially if
// in future there are new attributes added to ObjectMetadata
// that we do not explicitly call to set here
ObjectMetadata ret = new ObjectMetadata();
// Non null attributes
ret.setContentLength(source.getContentLength());
// Possibly null attributes
// Allowing nulls to pass breaks it during later use
if (source.getCacheControl() != null) {
ret.setCacheControl(source.getCacheControl());
}
if (source.getContentDisposition() != null) {
ret.setContentDisposition(source.getContentDisposition());
}
if (source.getContentEncoding() != null) {
ret.setContentEncoding(source.getContentEncoding());
}
if (source.getContentMD5() != null) {
ret.setContentMD5(source.getContentMD5());
}
if (source.getContentType() != null) {
ret.setContentType(source.getContentType());
}
if (source.getExpirationTime() != null) {
ret.setExpirationTime(source.getExpirationTime());
}
if (source.getExpirationTimeRuleId() != null) {
ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId());
}
if (source.getHttpExpiresDate() != null) {
ret.setHttpExpiresDate(source.getHttpExpiresDate());
}
if (source.getLastModified() != null) {
ret.setLastModified(source.getLastModified());
}
if (source.getOngoingRestore() != null) {
ret.setOngoingRestore(source.getOngoingRestore());
}
if (source.getRestoreExpirationTime() != null) {
ret.setRestoreExpirationTime(source.getRestoreExpirationTime());
}
if (source.getSSEAlgorithm() != null) {
ret.setSSEAlgorithm(source.getSSEAlgorithm());
}
if (source.getSSECustomerAlgorithm() != null) {
ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm());
}
if (source.getSSECustomerKeyMd5() != null) {
ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5());
}
for (Map.Entry<String, String> e : source.getUserMetadata().entrySet()) {
ret.addUserMetadata(e.getKey(), e.getValue());
}
return ret;
}
/** /**
* Return the number of bytes that large input files should be optimally * Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time. * be split into to minimize i/o time.

View File

@ -417,6 +417,13 @@ which pass in authentication details to the test runner
These are both Hadoop XML configuration files, which must be placed into These are both Hadoop XML configuration files, which must be placed into
`hadoop-tools/hadoop-aws/src/test/resources`. `hadoop-tools/hadoop-aws/src/test/resources`.
### `core-site.xml`
This file pre-exists and sources the configurations created
under `auth-keys.xml`.
For most purposes you will not need to edit this file unless you
need to apply a specific, non-default property change during the tests.
### `auth-keys.xml` ### `auth-keys.xml`