HADOOP-17628. Distcp contract test is really slow with ABFS and S3A; timing out. (#3240)

This patch cuts down the size of directory trees used for
distcp contract tests against object stores, so making
them much faster against distant/slow stores.

On abfs, the test only runs with -Dscale (as was the case for s3a already),
and has the larger scale test timeout.

After every test case, the FileSystem IOStatistics are logged,
to provide information about what IO is taking place and
what it's performance is.

There are some test cases which upload files of 1+ MiB; you can
increase the size of the upload in the option
"scale.test.distcp.file.size.kb" 
Set it to zero and the large file tests are skipped.

Contributed by Steve Loughran.
This commit is contained in:
Steve Loughran 2021-08-02 11:36:43 +01:00 committed by GitHub
parent efb3fa2bf5
commit ee466d4b40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 150 additions and 105 deletions

View File

@ -943,8 +943,8 @@ public static List<Path> createFiles(final FileSystem fs,
final int fileCount,
final int dirCount) throws IOException {
return createDirsAndFiles(fs, destDir, depth, fileCount, dirCount,
new ArrayList<Path>(fileCount),
new ArrayList<Path>(dirCount));
new ArrayList<>(fileCount),
new ArrayList<>(dirCount));
}
/**

View File

@ -18,17 +18,12 @@
package org.apache.hadoop.fs.contract.s3a;
import java.io.FileNotFoundException;
import java.io.IOException;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.s3a.FailureInjectionPolicy;
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
/**
@ -59,42 +54,29 @@ protected Configuration createConfiguration() {
return newConf;
}
@Override
protected boolean shouldUseDirectWrite() {
return true;
}
@Override
protected S3AContract createContract(Configuration conf) {
return new S3AContract(conf);
}
/**
* Always inject the delay path in, so if the destination is inconsistent,
* and uses this key, inconsistency triggered.
* @param filepath path string in
* @return path on the remote FS for distcp
* @throws IOException IO failure
*/
@Override
protected Path path(final String filepath) throws IOException {
Path path = super.path(filepath);
return new Path(path, FailureInjectionPolicy.DEFAULT_DELAY_KEY_SUBSTRING);
}
@Override
public void testDirectWrite() throws Exception {
public void testDistCpWithIterator() throws Exception {
final long renames = getRenameOperationCount();
super.testDirectWrite();
assertEquals("Expected no renames for a direct write distcp", 0L,
getRenameOperationCount() - renames);
super.testDistCpWithIterator();
assertEquals("Expected no renames for a direct write distcp",
getRenameOperationCount(),
renames);
}
@Override
public void testNonDirectWrite() throws Exception {
final long renames = getRenameOperationCount();
try {
super.testNonDirectWrite();
} catch (FileNotFoundException e) {
// We may get this exception when data is written to a DELAY_LISTING_ME
// directory causing verification of the distcp success to fail if
// S3Guard is not enabled
}
assertEquals("Expected 2 renames for a non-direct write distcp", 2L,
getRenameOperationCount() - renames);
}

View File

@ -19,16 +19,24 @@
package org.apache.hadoop.fs.azurebfs.contract;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.azure.integration.AzureTestConstants;
import org.apache.hadoop.fs.azurebfs.services.AuthType;
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
import org.junit.Assume;
import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled;
/**
* Contract test for distCp operation.
*/
public class ITestAbfsFileSystemContractDistCp extends AbstractContractDistCpTest {
private final ABFSContractTestBinding binding;
@Override
protected int getTestTimeoutMillis() {
return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS;
}
public ITestAbfsFileSystemContractDistCp() throws Exception {
binding = new ABFSContractTestBinding();
Assume.assumeTrue(binding.getAuthType() != AuthType.OAuth);
@ -38,6 +46,7 @@ public ITestAbfsFileSystemContractDistCp() throws Exception {
public void setup() throws Exception {
binding.setup();
super.setup();
assumeScaleTestsEnabled(binding.getRawConfiguration());
}
@Override

View File

@ -1,49 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.azurebfs.contract;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
/**
* Contract test for secure distCP operation.
*/
public class ITestAbfsFileSystemContractSecureDistCp extends AbstractContractDistCpTest {
private final ABFSContractTestBinding binding;
public ITestAbfsFileSystemContractSecureDistCp() throws Exception {
binding = new ABFSContractTestBinding();
}
@Override
public void setup() throws Exception {
binding.setup();
super.setup();
}
@Override
protected Configuration createConfiguration() {
return binding.getRawConfiguration();
}
@Override
protected AbfsFileSystemContract createContract(Configuration conf) {
return new AbfsFileSystemContract(conf, true);
}
}

View File

@ -18,7 +18,9 @@
package org.apache.hadoop.tools.contract;
import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel;
import static org.apache.hadoop.tools.DistCpConstants.CONF_LABEL_DISTCP_JOB_ID;
import java.io.IOException;
@ -77,6 +79,22 @@ public abstract class AbstractContractDistCpTest
protected static final int MB = 1024 * 1024;
/**
* Default depth for a directory tree: {@value}.
*/
protected static final int DEFAULT_DEPTH = 3;
/**
* Default width for a directory tree: {@value}.
* Total dir size is
* <pre>
* DEFAULT_WITH^DEFAULT_DEPTH
* </pre>
* So the duration of a test run grows rapidly with this value.
* This has very significant consequences for object storage runs.
*/
protected static final int DEFAULT_WIDTH = 2;
@Rule
public TestName testName = new TestName();
@ -154,13 +172,20 @@ public void setup() throws Exception {
localDir =
localFS.makeQualified(new Path(new Path(
GenericTestUtils.getTestDir().toURI()), testSubDir + "/local"));
localFS.delete(localDir, true);
mkdirs(localFS, localDir);
remoteDir = path(testSubDir + "/remote");
mkdirs(remoteFS, remoteDir);
Path testSubPath = path(testSubDir);
remoteDir = new Path(testSubPath, "remote");
// test teardown does this, but IDE-based test debugging can skip
// that teardown; this guarantees the initial state is clean
remoteFS.delete(remoteDir, true);
localFS.delete(localDir, true);
}
@Override
public void teardown() throws Exception {
// if remote FS supports IOStatistics log it.
logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, getRemoteFS());
super.teardown();
}
/**
@ -325,6 +350,7 @@ private Job distCpUpdate(final Path srcDir, final Path destDir)
.withDeleteMissing(true)
.withSyncFolder(true)
.withCRC(true)
.withDirectWrite(shouldUseDirectWrite())
.withOverwrite(false)));
}
@ -378,6 +404,7 @@ public void testTrackDeepDirectoryStructureToRemote() throws Exception {
inputDirUnderOutputDir)
.withTrackMissing(trackDir)
.withSyncFolder(true)
.withDirectWrite(shouldUseDirectWrite())
.withOverwrite(false)));
lsR("tracked udpate", remoteFS, destDir);
@ -476,7 +503,7 @@ public void testSetJobId() throws Exception {
remoteFS.create(new Path(remoteDir, "file1")).close();
DistCpTestUtils
.assertRunDistCp(DistCpConstants.SUCCESS, remoteDir.toString(),
localDir.toString(), null, conf);
localDir.toString(), getDefaultCLIOptionsOrNull(), conf);
assertNotNull("DistCp job id isn't set",
conf.get(CONF_LABEL_DISTCP_JOB_ID));
}
@ -532,13 +559,15 @@ private Path distCpDeepDirectoryStructure(FileSystem srcFS,
*/
private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
Path dstDir) throws Exception {
int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB,
getDefaultDistCPSizeKb());
if (fileSizeKb < 1) {
skip("File size in " + SCALE_TEST_DISTCP_FILE_SIZE_KB + " is zero");
}
initPathFields(srcDir, dstDir);
Path largeFile1 = new Path(inputDir, "file1");
Path largeFile2 = new Path(inputDir, "file2");
Path largeFile3 = new Path(inputDir, "file3");
mkdirs(srcFS, inputDir);
int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB,
DEFAULT_DISTCP_SIZE_KB);
int fileSizeMb = fileSizeKb / 1024;
getLogger().info("{} with file size {}", testName.getMethodName(), fileSizeMb);
byte[] data1 = dataset((fileSizeMb + 1) * MB, 33, 43);
@ -549,22 +578,37 @@ private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
createFile(srcFS, largeFile3, true, data3);
Path target = new Path(dstDir, "outputDir");
runDistCp(inputDir, target);
ContractTestUtils.assertIsDirectory(dstFS, target);
verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2);
verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3);
}
/**
* Override point. What is the default distcp size
* for large files if not overridden by
* {@link #SCALE_TEST_DISTCP_FILE_SIZE_KB}.
* If 0 then, unless overridden in the configuration,
* the large file tests will not run.
* @return file size.
*/
protected int getDefaultDistCPSizeKb() {
return DEFAULT_DISTCP_SIZE_KB;
}
/**
* Executes DistCp and asserts that the job finished successfully.
*
* The choice of direct/indirect is based on the value of
* {@link #shouldUseDirectWrite()}.
* @param src source path
* @param dst destination path
* @throws Exception if there is a failure
*/
private void runDistCp(Path src, Path dst) throws Exception {
runDistCp(buildWithStandardOptions(
new DistCpOptions.Builder(Collections.singletonList(src), dst)));
if (shouldUseDirectWrite()) {
runDistCpDirectWrite(src, dst);
} else {
runDistCpWithRename(src, dst);
}
}
/**
@ -607,6 +651,9 @@ private static void mkdirs(FileSystem fs, Path dir) throws Exception {
@Test
public void testDirectWrite() throws Exception {
describe("copy file from local to remote using direct write option");
if (shouldUseDirectWrite()) {
skip("not needed as all other tests use the -direct option.");
}
directWrite(localFS, localDir, remoteFS, remoteDir, true);
}
@ -623,8 +670,6 @@ public void testDistCpWithIterator() throws Exception {
Path source = new Path(remoteDir, "src");
Path dest = new Path(localDir, "dest");
dest = localFS.makeQualified(dest);
mkdirs(remoteFS, source);
verifyPathExists(remoteFS, "", source);
GenericTestUtils
.createFiles(remoteFS, source, getDepth(), getWidth(), getWidth());
@ -632,8 +677,9 @@ public void testDistCpWithIterator() throws Exception {
GenericTestUtils.LogCapturer log =
GenericTestUtils.LogCapturer.captureLogs(SimpleCopyListing.LOG);
String options = "-useiterator -update -delete" + getDefaultCLIOptions();
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
dest.toString(), "-useiterator -update -delete", conf);
dest.toString(), options, conf);
// Check the target listing was also done using iterator.
Assertions.assertThat(log.getOutput()).contains(
@ -644,11 +690,11 @@ public void testDistCpWithIterator() throws Exception {
}
public int getDepth() {
return 3;
return DEFAULT_DEPTH;
}
public int getWidth() {
return 10;
return DEFAULT_WIDTH;
}
private int getTotalFiles() {
@ -659,6 +705,41 @@ private int getTotalFiles() {
return totalFiles;
}
/**
* Override point: should direct write always be used?
* false by default; enable for stores where rename is slow.
* @return true if direct write should be used in all tests.
*/
protected boolean shouldUseDirectWrite() {
return false;
}
/**
* Return the default options for distcp, including,
* if {@link #shouldUseDirectWrite()} is true,
* the -direct option.
* Append or prepend this to string CLIs.
* @return default options.
*/
protected String getDefaultCLIOptions() {
return shouldUseDirectWrite()
? " -direct "
: "";
}
/**
* Return the default options for distcp, including,
* if {@link #shouldUseDirectWrite()} is true,
* the -direct option, null if there are no
* defaults.
* @return default options.
*/
protected String getDefaultCLIOptionsOrNull() {
return shouldUseDirectWrite()
? " -direct "
: null;
}
/**
* Executes a test with support for using direct write option.
*
@ -683,7 +764,7 @@ private void directWrite(FileSystem srcFS, Path srcDir, FileSystem dstFS,
if (directWrite) {
runDistCpDirectWrite(inputDir, target);
} else {
runDistCp(inputDir, target);
runDistCpWithRename(inputDir, target);
}
ContractTestUtils.assertIsDirectory(dstFS, target);
lsR("Destination tree after distcp", dstFS, target);
@ -709,6 +790,21 @@ private Job runDistCpDirectWrite(final Path srcDir, final Path destDir)
Collections.singletonList(srcDir), destDir)
.withDirectWrite(true)));
}
/**
* Run distcp srcDir destDir.
* @param srcDir local source directory
* @param destDir remote destination directory
* @return the completed job
* @throws Exception any failure.
*/
private Job runDistCpWithRename(Path srcDir, final Path destDir)
throws Exception {
describe("\nDistcp from " + srcDir + " to " + destDir);
return runDistCp(buildWithStandardOptions(
new DistCpOptions.Builder(
Collections.singletonList(srcDir), destDir)
.withDirectWrite(false)));
}
@Test
public void testDistCpWithFile() throws Exception {
@ -718,7 +814,6 @@ public void testDistCpWithFile() throws Exception {
Path dest = new Path(localDir, "file");
dest = localFS.makeQualified(dest);
mkdirs(remoteFS, remoteDir);
mkdirs(localFS, localDir);
int len = 4;
@ -729,7 +824,7 @@ public void testDistCpWithFile() throws Exception {
verifyPathExists(localFS, "", localDir);
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
dest.toString(), null, conf);
dest.toString(), getDefaultCLIOptionsOrNull(), conf);
Assertions
.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
@ -739,15 +834,12 @@ public void testDistCpWithFile() throws Exception {
@Test
public void testDistCpWithUpdateExistFile() throws Exception {
describe("Now update an exist file.");
describe("Now update an existing file.");
Path source = new Path(remoteDir, "file");
Path dest = new Path(localDir, "file");
dest = localFS.makeQualified(dest);
mkdirs(remoteFS, remoteDir);
mkdirs(localFS, localDir);
int len = 4;
int base = 0x40;
byte[] block = dataset(len, base, base + len);
@ -758,7 +850,7 @@ public void testDistCpWithUpdateExistFile() throws Exception {
verifyPathExists(remoteFS, "", source);
verifyPathExists(localFS, "", dest);
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
dest.toString(), "-delete -update", conf);
dest.toString(), "-delete -update" + getDefaultCLIOptions(), conf);
Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
.hasSize(1);

View File

@ -30,8 +30,9 @@
* Verifies that the HDFS passes all the tests in
* {@link AbstractContractDistCpTest}.
* As such, it acts as an in-module validation of this contract test itself.
* It does skip the large file test cases for speed.
*/
public class OptionalTestHDFSContractDistCp extends AbstractContractDistCpTest {
public class TestHDFSContractDistCp extends AbstractContractDistCpTest {
@BeforeClass
public static void createCluster() throws IOException {
@ -47,4 +48,14 @@ public static void teardownCluster() throws IOException {
protected AbstractFSContract createContract(Configuration conf) {
return new HDFSContract(conf);
}
/**
* Turn off the large file tests as they are very slow and there
* are many other distcp to HDFS tests which verify such things.
* @return 0
*/
@Override
protected int getDefaultDistCPSizeKb() {
return 0;
}
}