From 5f312a0d854f8d2c84099bb44783f07d84602625 Mon Sep 17 00:00:00 2001 From: Sumangala Date: Wed, 9 Dec 2020 15:35:03 +0530 Subject: [PATCH] HADOOP-17422: ABFS: Set default ListMaxResults to max server limit (#2535) Contributed by Sumangala Patki TEST RESULTS: namespace.enabled=true auth.type=SharedKey ------------------- $mvn -T 1C -Dparallel-tests=abfs -Dscale -DtestsThreadCount=8 clean verify Tests run: 90, Failures: 0, Errors: 0, Skipped: 0 Tests run: 462, Failures: 0, Errors: 0, Skipped: 24 Tests run: 208, Failures: 0, Errors: 0, Skipped: 24 namespace.enabled=true auth.type=OAuth ------------------- $mvn -T 1C -Dparallel-tests=abfs -Dscale -DtestsThreadCount=8 clean verify Tests run: 90, Failures: 0, Errors: 0, Skipped: 0 Tests run: 462, Failures: 0, Errors: 0, Skipped: 70 Tests run: 208, Failures: 0, Errors: 0, Skipped: 141 (cherry picked from commit a35fc3871b01d8a3a375f3ae0e330b55a1d9009f) --- .../fs/azurebfs/constants/FileSystemConfigurations.java | 2 +- hadoop-tools/hadoop-azure/src/site/markdown/abfs.md | 2 +- .../fs/azurebfs/ITestAzureBlobFileSystemListStatus.java | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 49fc58ba56..27dafd0b1f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -63,7 +63,7 @@ public final class FileSystemConfigurations { public static final int MAX_BUFFER_SIZE = 100 * ONE_MB; // 100 MB public static final long MAX_AZURE_BLOCK_SIZE = 256 * 1024 * 1024L; // changing default abfs blocksize to 256MB public static final String AZURE_BLOCK_LOCATION_HOST_DEFAULT = "localhost"; - public static final int DEFAULT_AZURE_LIST_MAX_RESULTS = 500; + public static final int DEFAULT_AZURE_LIST_MAX_RESULTS = 5000; public static final int MAX_CONCURRENT_READ_THREADS = 12; public static final int MAX_CONCURRENT_WRITE_THREADS = 8; diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index a4188111e0..0777f9b43b 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -848,7 +848,7 @@ Please refer the following links for further information. listStatus API fetches the FileStatus information from server in a page by page manner. The config `fs.azure.list.max.results` used to set the maxResults URI param which sets the pagesize(maximum results per call). The value should - be > 0. By default this will be 500. Server has a maximum value for this + be > 0. By default this will be 5000. Server has a maximum value for this parameter as 5000. So even if the config is above 5000 the response will only contain 5000 entries. Please refer the following link for further information. https://docs.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/list diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index 25a1567926..31f92d2bd3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -29,12 +29,15 @@ import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; @@ -55,7 +58,10 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { - final AzureBlobFileSystem fs = getFileSystem(); + Configuration config = new Configuration(this.getRawConfiguration()); + config.set(AZURE_LIST_MAX_RESULTS, "5000"); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem + .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); ExecutorService es = Executors.newFixedThreadPool(10);