diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 73eadef237..c8effc5413 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -265,6 +265,9 @@ Trunk (Unreleased) HADOOP-11828. Implement the Hitchhiker erasure coding algorithm. (Jack Liuquan via zhz) + HADOOP-12292. Make use of DeleteObjects optional. + (Thomas Demoor via stevel) + BUG FIXES HADOOP-12617. SPNEGO authentication request to non-default realm gets diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index ceae3ededf..34e1236173 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -887,6 +887,15 @@ for ldap providers in the same way as above does. Threshold before uploads or copies use parallel multipart operations. + + fs.s3a.multiobjectdelete.enable + true + When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + + + fs.s3a.acl.default Set a canned ACL for newly created and copied objects. Value may be private, diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4e9b502aa7..607857343e 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -115,6 +115,7 @@ 1.3.1 1.0-beta-1 1.0-alpha-8 + 900 @@ -1129,7 +1130,7 @@ maven-surefire-plugin false - 900 + ${surefire.fork.timeout} ${maven-surefire-plugin.argLine} ${hadoop.common.build.dir} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index fc99cb9607..f10f3db60d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -79,6 +79,9 @@ public class Constants { public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold"; public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE; + //enable multiobject-delete calls? + public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable"; + // comma separated list of directories public static final String BUFFER_DIR = "fs.s3a.buffer.dir"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 71771bd9b9..5ea6bec80c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -38,6 +38,7 @@ import com.amazonaws.auth.InstanceProfileCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.CannedAccessControlList; +import com.amazonaws.services.s3.model.DeleteObjectRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; @@ -82,6 +83,7 @@ public class S3AFileSystem extends FileSystem { private String bucket; private int maxKeys; private long partSize; + private boolean enableMultiObjectsDelete; private TransferManager transfers; private ExecutorService threadPoolExecutor; private long multiPartThreshold; @@ -200,6 +202,7 @@ public void initialize(URI name, Configuration conf) throws IOException { partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE); multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); + enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true); if (partSize < 5 * 1024 * 1024) { LOG.error(MULTIPART_SIZE + " must be at least 5 MB"); @@ -522,11 +525,7 @@ public boolean rename(Path src, Path dst) throws IOException { copyFile(summary.getKey(), newDstKey); if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keysToDelete.clear(); + removeKeys(keysToDelete, true); } } @@ -534,11 +533,8 @@ public boolean rename(Path src, Path dst) throws IOException { objects = s3.listNextBatchOfObjects(objects); statistics.incrementReadOps(1); } else { - if (keysToDelete.size() > 0) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keysToDelete); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + if (!keysToDelete.isEmpty()) { + removeKeys(keysToDelete, false); } break; } @@ -552,6 +548,36 @@ public boolean rename(Path src, Path dst) throws IOException { return true; } + /** + * A helper method to delete a list of keys on a s3-backend. + * + * @param keysToDelete collection of keys to delete on the s3-backend + * @param clearKeys clears the keysToDelete-list after processing the list + * when set to true + */ + private void removeKeys(List keysToDelete, + boolean clearKeys) { + if (enableMultiObjectsDelete) { + DeleteObjectsRequest deleteRequest + = new DeleteObjectsRequest(bucket).withKeys(keysToDelete); + s3.deleteObjects(deleteRequest); + statistics.incrementWriteOps(1); + } else { + int writeops = 0; + + for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) { + s3.deleteObject( + new DeleteObjectRequest(bucket, keyVersion.getKey())); + writeops++; + } + + statistics.incrementWriteOps(writeops); + } + if (clearKeys) { + keysToDelete.clear(); + } + } + /** Delete a file. * * @param f the path to delete. @@ -626,11 +652,7 @@ public boolean delete(Path f, boolean recursive) throws IOException { } if (keys.size() == MAX_ENTRIES_TO_DELETE) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); - keys.clear(); + removeKeys(keys, true); } } @@ -639,10 +661,7 @@ public boolean delete(Path f, boolean recursive) throws IOException { statistics.incrementReadOps(1); } else { if (!keys.isEmpty()) { - DeleteObjectsRequest deleteRequest = - new DeleteObjectsRequest(bucket).withKeys(keys); - s3.deleteObjects(deleteRequest); - statistics.incrementWriteOps(1); + removeKeys(keys, false); } break; } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 9fb0f362e4..73775c890c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -266,6 +266,15 @@ If you do any of these: change your credentials immediately! Threshold before uploads or copies use parallel multipart operations. + + fs.s3a.multiobjectdelete.enable + false + When enabled, multiple single-object delete requests are replaced by + a single 'delete multiple objects'-request, reducing the number of requests. + Beware: legacy S3-compatible object stores might not support this request. + + + fs.s3a.acl.default Set a canned ACL for newly created and copied objects. Value may be private, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index e0cbc92f5c..e44a90e902 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -36,13 +36,21 @@ /** * Base class for scale tests; here is where the common scale configuration - * keys are defined + * keys are defined. */ public class S3AScaleTestBase { public static final String SCALE_TEST = "scale.test."; + + /** + * The number of operations to perform: {@value} + */ public static final String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; + + /** + * The default number of operations to perform: {@value} + */ public static final long DEFAULT_OPERATION_COUNT = 2005; protected S3AFileSystem fs; @@ -71,6 +79,7 @@ public Configuration getConf() { @Before public void setUp() throws Exception { conf = createConfiguration(); + LOG.info("Scale test operation count = {}", getOperationCount()); fs = S3ATestUtils.createTestFileSystem(conf); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java index c913a67a4a..d521ba8ac9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java @@ -61,7 +61,7 @@ public void testBulkRenameAndDelete() throws Throwable { // use Executor to speed up file creation ExecutorService exec = Executors.newFixedThreadPool(16); final ExecutorCompletionService completionService = - new ExecutorCompletionService(exec); + new ExecutorCompletionService<>(exec); try { final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');