diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 73eadef237..c8effc5413 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -265,6 +265,9 @@ Trunk (Unreleased)
HADOOP-11828. Implement the Hitchhiker erasure coding algorithm.
(Jack Liuquan via zhz)
+ HADOOP-12292. Make use of DeleteObjects optional.
+ (Thomas Demoor via stevel)
+
BUG FIXES
HADOOP-12617. SPNEGO authentication request to non-default realm gets
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index ceae3ededf..34e1236173 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -887,6 +887,15 @@ for ldap providers in the same way as above does.
Threshold before uploads or copies use parallel multipart operations.
+
+ fs.s3a.multiobjectdelete.enable
+ true
+ When enabled, multiple single-object delete requests are replaced by
+ a single 'delete multiple objects'-request, reducing the number of requests.
+ Beware: legacy S3-compatible object stores might not support this request.
+
+
+
fs.s3a.acl.default
Set a canned ACL for newly created and copied objects. Value may be private,
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index 4e9b502aa7..607857343e 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -115,6 +115,7 @@
1.3.1
1.0-beta-1
1.0-alpha-8
+ 900
@@ -1129,7 +1130,7 @@
maven-surefire-plugin
false
- 900
+ ${surefire.fork.timeout}
${maven-surefire-plugin.argLine}
${hadoop.common.build.dir}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index fc99cb9607..f10f3db60d 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -79,6 +79,9 @@ public class Constants {
public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold";
public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE;
+ //enable multiobject-delete calls?
+ public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable";
+
// comma separated list of directories
public static final String BUFFER_DIR = "fs.s3a.buffer.dir";
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 71771bd9b9..5ea6bec80c 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -38,6 +38,7 @@
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.CannedAccessControlList;
+import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
@@ -82,6 +83,7 @@ public class S3AFileSystem extends FileSystem {
private String bucket;
private int maxKeys;
private long partSize;
+ private boolean enableMultiObjectsDelete;
private TransferManager transfers;
private ExecutorService threadPoolExecutor;
private long multiPartThreshold;
@@ -200,6 +202,7 @@ public void initialize(URI name, Configuration conf) throws IOException {
partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD,
DEFAULT_MIN_MULTIPART_THRESHOLD);
+ enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
if (partSize < 5 * 1024 * 1024) {
LOG.error(MULTIPART_SIZE + " must be at least 5 MB");
@@ -522,11 +525,7 @@ public boolean rename(Path src, Path dst) throws IOException {
copyFile(summary.getKey(), newDstKey);
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
- DeleteObjectsRequest deleteRequest =
- new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
- s3.deleteObjects(deleteRequest);
- statistics.incrementWriteOps(1);
- keysToDelete.clear();
+ removeKeys(keysToDelete, true);
}
}
@@ -534,11 +533,8 @@ public boolean rename(Path src, Path dst) throws IOException {
objects = s3.listNextBatchOfObjects(objects);
statistics.incrementReadOps(1);
} else {
- if (keysToDelete.size() > 0) {
- DeleteObjectsRequest deleteRequest =
- new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
- s3.deleteObjects(deleteRequest);
- statistics.incrementWriteOps(1);
+ if (!keysToDelete.isEmpty()) {
+ removeKeys(keysToDelete, false);
}
break;
}
@@ -552,6 +548,36 @@ public boolean rename(Path src, Path dst) throws IOException {
return true;
}
+ /**
+ * A helper method to delete a list of keys on a s3-backend.
+ *
+ * @param keysToDelete collection of keys to delete on the s3-backend
+ * @param clearKeys clears the keysToDelete-list after processing the list
+ * when set to true
+ */
+ private void removeKeys(List keysToDelete,
+ boolean clearKeys) {
+ if (enableMultiObjectsDelete) {
+ DeleteObjectsRequest deleteRequest
+ = new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
+ s3.deleteObjects(deleteRequest);
+ statistics.incrementWriteOps(1);
+ } else {
+ int writeops = 0;
+
+ for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
+ s3.deleteObject(
+ new DeleteObjectRequest(bucket, keyVersion.getKey()));
+ writeops++;
+ }
+
+ statistics.incrementWriteOps(writeops);
+ }
+ if (clearKeys) {
+ keysToDelete.clear();
+ }
+ }
+
/** Delete a file.
*
* @param f the path to delete.
@@ -626,11 +652,7 @@ public boolean delete(Path f, boolean recursive) throws IOException {
}
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
- DeleteObjectsRequest deleteRequest =
- new DeleteObjectsRequest(bucket).withKeys(keys);
- s3.deleteObjects(deleteRequest);
- statistics.incrementWriteOps(1);
- keys.clear();
+ removeKeys(keys, true);
}
}
@@ -639,10 +661,7 @@ public boolean delete(Path f, boolean recursive) throws IOException {
statistics.incrementReadOps(1);
} else {
if (!keys.isEmpty()) {
- DeleteObjectsRequest deleteRequest =
- new DeleteObjectsRequest(bucket).withKeys(keys);
- s3.deleteObjects(deleteRequest);
- statistics.incrementWriteOps(1);
+ removeKeys(keys, false);
}
break;
}
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 9fb0f362e4..73775c890c 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -266,6 +266,15 @@ If you do any of these: change your credentials immediately!
Threshold before uploads or copies use parallel multipart operations.
+
+ fs.s3a.multiobjectdelete.enable
+ false
+ When enabled, multiple single-object delete requests are replaced by
+ a single 'delete multiple objects'-request, reducing the number of requests.
+ Beware: legacy S3-compatible object stores might not support this request.
+
+
+
fs.s3a.acl.default
Set a canned ACL for newly created and copied objects. Value may be private,
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
index e0cbc92f5c..e44a90e902 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
@@ -36,13 +36,21 @@
/**
* Base class for scale tests; here is where the common scale configuration
- * keys are defined
+ * keys are defined.
*/
public class S3AScaleTestBase {
public static final String SCALE_TEST = "scale.test.";
+
+ /**
+ * The number of operations to perform: {@value}
+ */
public static final String KEY_OPERATION_COUNT =
SCALE_TEST + "operation.count";
+
+ /**
+ * The default number of operations to perform: {@value}
+ */
public static final long DEFAULT_OPERATION_COUNT = 2005;
protected S3AFileSystem fs;
@@ -71,6 +79,7 @@ public Configuration getConf() {
@Before
public void setUp() throws Exception {
conf = createConfiguration();
+ LOG.info("Scale test operation count = {}", getOperationCount());
fs = S3ATestUtils.createTestFileSystem(conf);
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
index c913a67a4a..d521ba8ac9 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
@@ -61,7 +61,7 @@ public void testBulkRenameAndDelete() throws Throwable {
// use Executor to speed up file creation
ExecutorService exec = Executors.newFixedThreadPool(16);
final ExecutorCompletionService completionService =
- new ExecutorCompletionService(exec);
+ new ExecutorCompletionService<>(exec);
try {
final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');