HADOOP-19072. S3A: expand optimisations on stores with "fs.s3a.performance.flags" for mkdir (#6543)

If the flag list in fs.s3a.performance.flags includes "mkdir"
then the safety check of a walk up the tree to look for a parent directory,
-done to verify a directory isn't being created under a file- are skipped.

This saves the cost of multiple list operations.

Contributed by Viraj Jasani
This commit is contained in:
Viraj Jasani 2024-08-08 09:48:51 -07:00 committed by GitHub
parent 2a50911734
commit 321a6cc55e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 265 additions and 34 deletions

View File

@ -200,8 +200,8 @@ Prioritize file creation performance over safety checks for filesystem consisten
This:
1. Skips the `LIST` call which makes sure a file is being created over a directory.
Risk: a file is created over a directory.
1. Ignores the overwrite flag.
1. Never issues a `DELETE` call to delete parent directory markers.
2. Ignores the overwrite flag.
3. Never issues a `DELETE` call to delete parent directory markers.
It is possible to probe an S3A Filesystem instance for this capability through
the `hasPathCapability(path, "fs.s3a.create.performance")` check.

View File

@ -27,6 +27,7 @@ import org.junit.Test;
import static org.apache.hadoop.fs.FileContextTestHelper.*;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import org.apache.hadoop.test.GenericTestUtils;
import org.slf4j.event.Level;
@ -55,7 +56,10 @@ public abstract class FileContextCreateMkdirBaseTest {
protected final FileContextTestHelper fileContextTestHelper;
protected static FileContext fc;
public static final String MKDIR_FILE_PRESENT_ERROR =
" should have failed as a file was present";
static {
GenericTestUtils.setLogLevel(FileSystem.LOG, Level.DEBUG);
}
@ -128,7 +132,7 @@ public abstract class FileContextCreateMkdirBaseTest {
}
@Test
public void testMkdirRecursiveWithExistingFile() throws IOException {
public void testMkdirRecursiveWithExistingFile() throws Exception {
Path f = getTestRootPath(fc, "NonExistant3/aDir");
fc.mkdir(f, FileContext.DEFAULT_PERM, true);
assertIsDirectory(fc.getFileStatus(f));
@ -141,13 +145,12 @@ public abstract class FileContextCreateMkdirBaseTest {
// try creating another folder which conflicts with filePath
Path dirPath = new Path(filePath, "bDir/cDir");
try {
fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true);
Assert.fail("Mkdir for " + dirPath
+ " should have failed as a file was present");
} catch(IOException e) {
// failed as expected
}
intercept(
IOException.class,
null,
"Mkdir for " + dirPath + MKDIR_FILE_PRESENT_ERROR,
() -> fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true)
);
}
@Test

View File

@ -35,6 +35,9 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
*/
public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBase {
public static final String MKDIRS_NOT_FAILED_OVER_FILE =
"mkdirs did not fail over a file but returned ";
@Test
public void testMkDirRmDir() throws Throwable {
FileSystem fs = getFileSystem();
@ -66,7 +69,7 @@ public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBa
createFile(getFileSystem(), path, false, dataset);
try {
boolean made = fs.mkdirs(path);
fail("mkdirs did not fail over a file but returned " + made
fail(MKDIRS_NOT_FAILED_OVER_FILE + made
+ "; " + ls(path));
} catch (ParentNotDirectoryException | FileAlreadyExistsException e) {
//parent is a directory
@ -93,7 +96,7 @@ public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBa
Path child = new Path(path,"child-to-mkdir");
try {
boolean made = fs.mkdirs(child);
fail("mkdirs did not fail over a file but returned " + made
fail(MKDIRS_NOT_FAILED_OVER_FILE + made
+ "; " + ls(path));
} catch (ParentNotDirectoryException | FileAlreadyExistsException e) {
//parent is a directory

View File

@ -3828,7 +3828,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
createStoreContext(),
path,
createMkdirOperationCallbacks(),
isMagicCommitPath(path)));
isMagicCommitPath(path),
performanceFlags.enabled(PerformanceFlagEnum.Mkdir)));
}
/**
@ -4281,7 +4282,9 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
new MkdirOperation(
storeContext,
path,
createMkdirOperationCallbacks(), false));
createMkdirOperationCallbacks(),
false,
performanceFlags.enabled(PerformanceFlagEnum.Mkdir)));
}
}

View File

@ -26,6 +26,8 @@ import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
@ -54,30 +56,54 @@ import org.apache.hadoop.fs.s3a.S3AFileStatus;
* <li>If needed, one PUT</li>
* </ol>
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class MkdirOperation extends ExecutingStoreOperation<Boolean> {
private static final Logger LOG = LoggerFactory.getLogger(
MkdirOperation.class);
/**
* Path of the directory to be created.
*/
private final Path dir;
/**
* Mkdir Callbacks object to be used by the Mkdir operation.
*/
private final MkdirCallbacks callbacks;
/**
* Should checks for ancestors existing be skipped?
* This flag is set when working with magic directories.
* Whether to skip the validation of the parent directory.
*/
private final boolean performanceMkdir;
/**
* Whether the path is magic commit path.
*/
private final boolean isMagicPath;
/**
* Initialize Mkdir Operation context for S3A.
*
* @param storeContext Store context.
* @param dir Dir path of the directory.
* @param callbacks MkdirCallbacks object used by the Mkdir operation.
* @param isMagicPath True if the path is magic commit path.
* @param performanceMkdir If true, skip validation of the parent directory
* structure.
*/
public MkdirOperation(
final StoreContext storeContext,
final Path dir,
final MkdirCallbacks callbacks,
final boolean isMagicPath) {
final boolean isMagicPath,
final boolean performanceMkdir) {
super(storeContext);
this.dir = dir;
this.callbacks = callbacks;
this.isMagicPath = isMagicPath;
this.performanceMkdir = performanceMkdir;
}
/**
@ -124,7 +150,32 @@ public class MkdirOperation extends ExecutingStoreOperation<Boolean> {
return true;
}
// Walk path to root, ensuring closest ancestor is a directory, not file
// if performance creation mode is set, no need to check
// whether the closest ancestor is dir.
if (!performanceMkdir) {
verifyFileStatusOfClosestAncestor();
}
// if we get here there is no directory at the destination.
// so create one.
// Create the marker file, delete the parent entries
// if the filesystem isn't configured to retain them
callbacks.createFakeDirectory(dir, false);
return true;
}
/**
* Verify the file status of the closest ancestor, if it is
* dir, the mkdir operation should proceed. If it is file,
* the mkdir operation should throw error.
*
* @throws IOException If either file status could not be retrieved,
* or if the closest ancestor is a file.
*/
private void verifyFileStatusOfClosestAncestor() throws IOException {
FileStatus fileStatus;
// Walk path to root, ensuring the closest ancestor is a directory, not file
Path fPart = dir.getParent();
try {
while (fPart != null && !fPart.isRoot()) {
@ -140,24 +191,18 @@ public class MkdirOperation extends ExecutingStoreOperation<Boolean> {
}
// there's a file at the parent entry
throw new FileAlreadyExistsException(String.format(
"Can't make directory for path '%s' since it is a file.",
fPart));
throw new FileAlreadyExistsException(
String.format(
"Can't make directory for path '%s' since it is a file.",
fPart));
}
} catch (AccessDeniedException e) {
LOG.info("mkdirs({}}: Access denied when looking"
+ " for parent directory {}; skipping checks",
dir, fPart);
dir,
fPart);
LOG.debug("{}", e, e);
}
// if we get here there is no directory at the destination.
// so create one.
// Create the marker file, delete the parent entries
// if the filesystem isn't configured to retain them
callbacks.createFakeDirectory(dir, false);
return true;
}
/**

View File

@ -299,8 +299,11 @@ understands the risks.
| *Option* | *Meaning* | Since |
|----------|--------------------|:------|
| `create` | Create Performance | 3.4.1 |
| `mkdir` | Mkdir Performance | 3.4.1 |
The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance)
* The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance)
* The `mkdir` flag semantics are explained in [Mkdir Performance](#mkdir-performance)
### <a name="create-performance"></a> Create Performance `fs.s3a.create.performance`
@ -321,6 +324,22 @@ It may however result in
Use with care, and, ideally, enable versioning on the S3 store.
### <a name="mkdir-performance"></a> Mkdir Performance
`fs.s3a.performance.flag` flag option `mkdir`:
* Mkdir does not check whether the parent is directory or file.
This avoids the verification of the file status of the parent file
or the closest ancestor. Unlike the default mkdir operation, if the
parent is not a directory, the mkdir operation does not throw any
error.
This option can help with mkdir performance improvement but must be used
only if the person setting them understands the above-mentioned risk.
### <a name="threads"></a> Thread and connection pool settings.
Each S3A client interacting with a single bucket, as a single user, has its

View File

@ -22,11 +22,22 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
/**
* Test dir operations on S3A.
*/
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
removeBaseAndBucketOverrides(conf,
FS_S3A_CREATE_PERFORMANCE);
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.contract.s3a;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
/**
* Test mkdir operations on S3A with create performance mode.
*/
public class ITestS3AContractMkdirWithCreatePerf extends AbstractContractMkdirTest {
@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
removeBaseAndBucketOverrides(
conf,
FS_S3A_CREATE_PERFORMANCE,
FS_S3A_PERFORMANCE_FLAGS);
conf.setStrings(FS_S3A_PERFORMANCE_FLAGS,
"create,mkdir");
return conf;
}
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
}
@Test
public void testMkdirOverParentFile() throws Throwable {
describe("try to mkdir where a parent is a file, should pass");
FileSystem fs = getFileSystem();
Path path = methodPath();
byte[] dataset = dataset(1024, ' ', 'z');
createFile(getFileSystem(), path, false, dataset);
Path child = new Path(path, "child-to-mkdir");
boolean childCreated = fs.mkdirs(child);
assertTrue("Child dir is created", childCreated);
assertIsFile(path);
byte[] bytes = ContractTestUtils.readDataset(getFileSystem(), path, dataset.length);
ContractTestUtils.compareByteArrays(dataset, bytes, dataset.length);
assertPathExists("mkdir failed", child);
assertDeleted(child, true);
}
}

View File

@ -13,12 +13,14 @@
*/
package org.apache.hadoop.fs.s3a.fileContext;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.junit.Before;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
/**
* Extends FileContextCreateMkdirBaseTest for a S3a FileContext.
*/
@ -26,8 +28,11 @@ public class ITestS3AFileContextCreateMkdir
extends FileContextCreateMkdirBaseTest {
@Before
public void setUp() throws IOException, Exception {
public void setUp() throws Exception {
Configuration conf = new Configuration();
removeBaseAndBucketOverrides(
conf,
FS_S3A_CREATE_PERFORMANCE);
fc = S3ATestUtils.createTestFileContext(conf);
super.setUp();
}

View File

@ -0,0 +1,67 @@
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.fileContext;
import org.junit.Before;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/**
* Extends FileContextCreateMkdirBaseTest for a S3a FileContext with
* create performance mode.
*/
public class ITestS3AFileContextCreateMkdirCreatePerf
extends FileContextCreateMkdirBaseTest {
@Before
public void setUp() throws Exception {
Configuration conf = new Configuration();
removeBaseAndBucketOverrides(
conf,
FS_S3A_CREATE_PERFORMANCE,
FS_S3A_PERFORMANCE_FLAGS);
conf.setStrings(FS_S3A_PERFORMANCE_FLAGS,
"mkdir");
fc = S3ATestUtils.createTestFileContext(conf);
super.setUp();
}
@Override
public void tearDown() throws Exception {
if (fc != null) {
super.tearDown();
}
}
@Test
public void testMkdirRecursiveWithExistingFile() throws Exception {
intercept(
AssertionError.class,
MKDIR_FILE_PRESENT_ERROR,
"Dir creation should not have failed. "
+ "Creation performance mode is expected "
+ "to create dir without checking file "
+ "status of parent dir.",
super::testMkdirRecursiveWithExistingFile);
}
}