diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
index 5f24e75569..7dd3170036 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
@@ -200,8 +200,8 @@ Prioritize file creation performance over safety checks for filesystem consisten
This:
1. Skips the `LIST` call which makes sure a file is being created over a directory.
Risk: a file is created over a directory.
-1. Ignores the overwrite flag.
-1. Never issues a `DELETE` call to delete parent directory markers.
+2. Ignores the overwrite flag.
+3. Never issues a `DELETE` call to delete parent directory markers.
It is possible to probe an S3A Filesystem instance for this capability through
the `hasPathCapability(path, "fs.s3a.create.performance")` check.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java
index fbd598c9de..fcb1b6925a 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java
@@ -27,6 +27,7 @@
import static org.apache.hadoop.fs.FileContextTestHelper.*;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import org.apache.hadoop.test.GenericTestUtils;
import org.slf4j.event.Level;
@@ -55,7 +56,10 @@ public abstract class FileContextCreateMkdirBaseTest {
protected final FileContextTestHelper fileContextTestHelper;
protected static FileContext fc;
-
+
+ public static final String MKDIR_FILE_PRESENT_ERROR =
+ " should have failed as a file was present";
+
static {
GenericTestUtils.setLogLevel(FileSystem.LOG, Level.DEBUG);
}
@@ -128,7 +132,7 @@ public void testMkdirsRecursiveWithExistingDir() throws IOException {
}
@Test
- public void testMkdirRecursiveWithExistingFile() throws IOException {
+ public void testMkdirRecursiveWithExistingFile() throws Exception {
Path f = getTestRootPath(fc, "NonExistant3/aDir");
fc.mkdir(f, FileContext.DEFAULT_PERM, true);
assertIsDirectory(fc.getFileStatus(f));
@@ -141,13 +145,12 @@ public void testMkdirRecursiveWithExistingFile() throws IOException {
// try creating another folder which conflicts with filePath
Path dirPath = new Path(filePath, "bDir/cDir");
- try {
- fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true);
- Assert.fail("Mkdir for " + dirPath
- + " should have failed as a file was present");
- } catch(IOException e) {
- // failed as expected
- }
+ intercept(
+ IOException.class,
+ null,
+ "Mkdir for " + dirPath + MKDIR_FILE_PRESENT_ERROR,
+ () -> fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true)
+ );
}
@Test
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java
index de44bc232e..65ca0ee218 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java
@@ -35,6 +35,9 @@
*/
public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBase {
+ public static final String MKDIRS_NOT_FAILED_OVER_FILE =
+ "mkdirs did not fail over a file but returned ";
+
@Test
public void testMkDirRmDir() throws Throwable {
FileSystem fs = getFileSystem();
@@ -66,7 +69,7 @@ public void testNoMkdirOverFile() throws Throwable {
createFile(getFileSystem(), path, false, dataset);
try {
boolean made = fs.mkdirs(path);
- fail("mkdirs did not fail over a file but returned " + made
+ fail(MKDIRS_NOT_FAILED_OVER_FILE + made
+ "; " + ls(path));
} catch (ParentNotDirectoryException | FileAlreadyExistsException e) {
//parent is a directory
@@ -93,7 +96,7 @@ public void testMkdirOverParentFile() throws Throwable {
Path child = new Path(path,"child-to-mkdir");
try {
boolean made = fs.mkdirs(child);
- fail("mkdirs did not fail over a file but returned " + made
+ fail(MKDIRS_NOT_FAILED_OVER_FILE + made
+ "; " + ls(path));
} catch (ParentNotDirectoryException | FileAlreadyExistsException e) {
//parent is a directory
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index f820769609..25b036b5fc 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -3828,7 +3828,8 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException,
createStoreContext(),
path,
createMkdirOperationCallbacks(),
- isMagicCommitPath(path)));
+ isMagicCommitPath(path),
+ performanceFlags.enabled(PerformanceFlagEnum.Mkdir)));
}
/**
@@ -4281,7 +4282,9 @@ public boolean createEmptyDir(Path path, StoreContext storeContext)
new MkdirOperation(
storeContext,
path,
- createMkdirOperationCallbacks(), false));
+ createMkdirOperationCallbacks(),
+ false,
+ performanceFlags.enabled(PerformanceFlagEnum.Mkdir)));
}
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java
index 98a91b1881..a027cabffd 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java
@@ -26,6 +26,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
@@ -54,30 +56,54 @@
*
If needed, one PUT
*
*/
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
public class MkdirOperation extends ExecutingStoreOperation {
private static final Logger LOG = LoggerFactory.getLogger(
MkdirOperation.class);
+ /**
+ * Path of the directory to be created.
+ */
private final Path dir;
+ /**
+ * Mkdir Callbacks object to be used by the Mkdir operation.
+ */
private final MkdirCallbacks callbacks;
/**
- * Should checks for ancestors existing be skipped?
- * This flag is set when working with magic directories.
+ * Whether to skip the validation of the parent directory.
+ */
+ private final boolean performanceMkdir;
+
+ /**
+ * Whether the path is magic commit path.
*/
private final boolean isMagicPath;
+ /**
+ * Initialize Mkdir Operation context for S3A.
+ *
+ * @param storeContext Store context.
+ * @param dir Dir path of the directory.
+ * @param callbacks MkdirCallbacks object used by the Mkdir operation.
+ * @param isMagicPath True if the path is magic commit path.
+ * @param performanceMkdir If true, skip validation of the parent directory
+ * structure.
+ */
public MkdirOperation(
final StoreContext storeContext,
final Path dir,
final MkdirCallbacks callbacks,
- final boolean isMagicPath) {
+ final boolean isMagicPath,
+ final boolean performanceMkdir) {
super(storeContext);
this.dir = dir;
this.callbacks = callbacks;
this.isMagicPath = isMagicPath;
+ this.performanceMkdir = performanceMkdir;
}
/**
@@ -124,7 +150,32 @@ public Boolean execute() throws IOException {
return true;
}
- // Walk path to root, ensuring closest ancestor is a directory, not file
+ // if performance creation mode is set, no need to check
+ // whether the closest ancestor is dir.
+ if (!performanceMkdir) {
+ verifyFileStatusOfClosestAncestor();
+ }
+
+ // if we get here there is no directory at the destination.
+ // so create one.
+
+ // Create the marker file, delete the parent entries
+ // if the filesystem isn't configured to retain them
+ callbacks.createFakeDirectory(dir, false);
+ return true;
+ }
+
+ /**
+ * Verify the file status of the closest ancestor, if it is
+ * dir, the mkdir operation should proceed. If it is file,
+ * the mkdir operation should throw error.
+ *
+ * @throws IOException If either file status could not be retrieved,
+ * or if the closest ancestor is a file.
+ */
+ private void verifyFileStatusOfClosestAncestor() throws IOException {
+ FileStatus fileStatus;
+ // Walk path to root, ensuring the closest ancestor is a directory, not file
Path fPart = dir.getParent();
try {
while (fPart != null && !fPart.isRoot()) {
@@ -140,24 +191,18 @@ public Boolean execute() throws IOException {
}
// there's a file at the parent entry
- throw new FileAlreadyExistsException(String.format(
- "Can't make directory for path '%s' since it is a file.",
- fPart));
+ throw new FileAlreadyExistsException(
+ String.format(
+ "Can't make directory for path '%s' since it is a file.",
+ fPart));
}
} catch (AccessDeniedException e) {
LOG.info("mkdirs({}}: Access denied when looking"
+ " for parent directory {}; skipping checks",
- dir, fPart);
+ dir,
+ fPart);
LOG.debug("{}", e, e);
}
-
- // if we get here there is no directory at the destination.
- // so create one.
-
- // Create the marker file, delete the parent entries
- // if the filesystem isn't configured to retain them
- callbacks.createFakeDirectory(dir, false);
- return true;
}
/**
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
index 876072e81e..b8cb3ff732 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md
@@ -299,8 +299,11 @@ understands the risks.
| *Option* | *Meaning* | Since |
|----------|--------------------|:------|
| `create` | Create Performance | 3.4.1 |
+| `mkdir` | Mkdir Performance | 3.4.1 |
-The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance)
+
+* The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance)
+* The `mkdir` flag semantics are explained in [Mkdir Performance](#mkdir-performance)
### Create Performance `fs.s3a.create.performance`
@@ -321,6 +324,22 @@ It may however result in
Use with care, and, ideally, enable versioning on the S3 store.
+
+### Mkdir Performance
+
+`fs.s3a.performance.flag` flag option `mkdir`:
+
+* Mkdir does not check whether the parent is directory or file.
+
+This avoids the verification of the file status of the parent file
+or the closest ancestor. Unlike the default mkdir operation, if the
+parent is not a directory, the mkdir operation does not throw any
+error.
+
+This option can help with mkdir performance improvement but must be used
+only if the person setting them understands the above-mentioned risk.
+
+
### Thread and connection pool settings.
Each S3A client interacting with a single bucket, as a single user, has its
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java
index d953e7eb6a..bace0a79f2 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java
@@ -22,11 +22,22 @@
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+
/**
* Test dir operations on S3A.
*/
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ removeBaseAndBucketOverrides(conf,
+ FS_S3A_CREATE_PERFORMANCE);
+ return conf;
+ }
+
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java
new file mode 100644
index 0000000000..cacd6945d2
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.contract.s3a;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+
+/**
+ * Test mkdir operations on S3A with create performance mode.
+ */
+public class ITestS3AContractMkdirWithCreatePerf extends AbstractContractMkdirTest {
+
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ removeBaseAndBucketOverrides(
+ conf,
+ FS_S3A_CREATE_PERFORMANCE,
+ FS_S3A_PERFORMANCE_FLAGS);
+ conf.setStrings(FS_S3A_PERFORMANCE_FLAGS,
+ "create,mkdir");
+ return conf;
+ }
+
+ @Override
+ protected AbstractFSContract createContract(Configuration conf) {
+ return new S3AContract(conf);
+ }
+
+ @Test
+ public void testMkdirOverParentFile() throws Throwable {
+ describe("try to mkdir where a parent is a file, should pass");
+ FileSystem fs = getFileSystem();
+ Path path = methodPath();
+ byte[] dataset = dataset(1024, ' ', 'z');
+ createFile(getFileSystem(), path, false, dataset);
+ Path child = new Path(path, "child-to-mkdir");
+ boolean childCreated = fs.mkdirs(child);
+ assertTrue("Child dir is created", childCreated);
+ assertIsFile(path);
+ byte[] bytes = ContractTestUtils.readDataset(getFileSystem(), path, dataset.length);
+ ContractTestUtils.compareByteArrays(dataset, bytes, dataset.length);
+ assertPathExists("mkdir failed", child);
+ assertDeleted(child, true);
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java
index dcc9da9336..e71ca2ae52 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java
@@ -13,12 +13,14 @@
*/
package org.apache.hadoop.fs.s3a.fileContext;
-import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.junit.Before;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+
/**
* Extends FileContextCreateMkdirBaseTest for a S3a FileContext.
*/
@@ -26,8 +28,11 @@ public class ITestS3AFileContextCreateMkdir
extends FileContextCreateMkdirBaseTest {
@Before
- public void setUp() throws IOException, Exception {
+ public void setUp() throws Exception {
Configuration conf = new Configuration();
+ removeBaseAndBucketOverrides(
+ conf,
+ FS_S3A_CREATE_PERFORMANCE);
fc = S3ATestUtils.createTestFileContext(conf);
super.setUp();
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java
new file mode 100644
index 0000000000..64039e4c52
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.s3a.fileContext;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
+import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Extends FileContextCreateMkdirBaseTest for a S3a FileContext with
+ * create performance mode.
+ */
+public class ITestS3AFileContextCreateMkdirCreatePerf
+ extends FileContextCreateMkdirBaseTest {
+
+ @Before
+ public void setUp() throws Exception {
+ Configuration conf = new Configuration();
+ removeBaseAndBucketOverrides(
+ conf,
+ FS_S3A_CREATE_PERFORMANCE,
+ FS_S3A_PERFORMANCE_FLAGS);
+ conf.setStrings(FS_S3A_PERFORMANCE_FLAGS,
+ "mkdir");
+ fc = S3ATestUtils.createTestFileContext(conf);
+ super.setUp();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ if (fc != null) {
+ super.tearDown();
+ }
+ }
+
+ @Test
+ public void testMkdirRecursiveWithExistingFile() throws Exception {
+ intercept(
+ AssertionError.class,
+ MKDIR_FILE_PRESENT_ERROR,
+ "Dir creation should not have failed. "
+ + "Creation performance mode is expected "
+ + "to create dir without checking file "
+ + "status of parent dir.",
+ super::testMkdirRecursiveWithExistingFile);
+ }
+
+}