From 37e0828e76934c70988c897ab43ff570fcbbafc4 Mon Sep 17 00:00:00 2001 From: Brian Loss Date: Wed, 28 Jul 2021 15:22:58 -0400 Subject: [PATCH] HADOOP-17811: ABFS ExponentialRetryPolicy doesn't pick up configuration values (#3221) Contributed by Brian Loss. Change-Id: I5f24196d1d02de91336c3679abaf8d55cfaed746 --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- .../services/ExponentialRetryPolicy.java | 11 +++++ .../hadoop-azure/src/site/markdown/abfs.md | 28 +++++++++++-- .../src/site/markdown/testing_azure.md | 4 +- .../services/TestExponentialRetryPolicy.java | 40 ++++++++++++++++--- 5 files changed, 74 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index be9a9f265f..54e2cd40c6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1479,7 +1479,7 @@ private void initializeClient(URI uri, String fileSystemName, private AbfsClientContext populateAbfsClientContext() { return new AbfsClientContextBuilder() .withExponentialRetryPolicy( - new ExponentialRetryPolicy(abfsConfiguration.getMaxIoRetries())) + new ExponentialRetryPolicy(abfsConfiguration)) .withAbfsCounters(abfsCounters) .withAbfsPerfTracker(abfsPerfTracker) .build(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java index 9a75c78aa0..89d99471a8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java @@ -21,6 +21,7 @@ import java.util.Random; import java.net.HttpURLConnection; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** @@ -89,6 +90,16 @@ public ExponentialRetryPolicy(final int maxIoRetries) { DEFAULT_CLIENT_BACKOFF); } + /** + * Initializes a new instance of the {@link ExponentialRetryPolicy} class. + * + * @param conf The {@link AbfsConfiguration} from which to retrieve retry configuration. + */ + public ExponentialRetryPolicy(AbfsConfiguration conf) { + this(conf.getMaxIoRetries(), conf.getMinBackoffIntervalMilliseconds(), conf.getMaxBackoffIntervalMilliseconds(), + conf.getBackoffIntervalMilliseconds()); + } + /** * Initializes a new instance of the {@link ExponentialRetryPolicy} class. * diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 8724b97ab3..9e11858abe 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -338,7 +338,7 @@ with the following configurations. retries. Default value is 5. * `fs.azure.oauth.token.fetch.retry.min.backoff.interval`: Minimum back-off interval. Added to the retry interval computed from delta backoff. By - default this si set as 0. Set the interval in milli seconds. + default this is set as 0. Set the interval in milli seconds. * `fs.azure.oauth.token.fetch.retry.max.backoff.interval`: Maximum back-off interval. Default value is 60000 (sixty seconds). Set the interval in milli seconds. @@ -778,9 +778,31 @@ The following configs are related to read and write operations. `fs.azure.io.retry.max.retries`: Sets the number of retries for IO operations. Currently this is used only for the server call retry logic. Used within -AbfsClient class as part of the ExponentialRetryPolicy. The value should be +`AbfsClient` class as part of the ExponentialRetryPolicy. The value should be greater than or equal to 0. +`fs.azure.io.retry.min.backoff.interval`: Sets the minimum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value indicates the smallest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 3000 (3 seconds). + +`fs.azure.io.retry.max.backoff.interval`: Sets the maximum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value indicates the largest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 30000 (30 seconds). + +`fs.azure.io.retry.backoff.interval`: Sets the default backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This +value is used to compute a random delta between 80% and 120% of the specified +value. This random delta is then multiplied by an exponent of the current IO +retry number (i.e., the default is multiplied by `2^(retryNum - 1)`) and then +contstrained within the range of [`fs.azure.io.retry.min.backoff.interval`, +`fs.azure.io.retry.max.backoff.interval`] to determine the amount of time to +wait before the next IO retry attempt. The default value is 3000 (3 seconds). + `fs.azure.write.request.size`: To set the write buffer size. Specify the value in bytes. The value should be between 16384 to 104857600 both inclusive (16 KB to 100 MB). The default value will be 8388608 (8 MB). @@ -837,7 +859,7 @@ when there are too many writes from the same process. ### Security Options `fs.azure.always.use.https`: Enforces to use HTTPS instead of HTTP when the flag -is made true. Irrespective of the flag, AbfsClient will use HTTPS if the secure +is made true. Irrespective of the flag, `AbfsClient` will use HTTPS if the secure scheme (ABFSS) is used or OAuth is used for authentication. By default this will be set to true. diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md index cf3b234445..933f86be3e 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -448,7 +448,7 @@ use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical. -#### Subclasses Existing Shared Base Blasses +#### Subclasses Existing Shared Base Classes There are a set of base classes which should be extended for Azure tests and integration tests. @@ -602,7 +602,7 @@ various test combinations, it will: 2. Run tests for all combinations 3. Summarize results across all the test combination runs. -As a pre-requiste step, fill config values for test accounts and credentials +As a pre-requisite step, fill config values for test accounts and credentials needed for authentication in `src/test/resources/azure-auth-keys.xml.template` and rename as `src/test/resources/azure-auth-keys.xml`. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index e10419f148..0f8dc55aa1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -18,6 +18,11 @@ package org.apache.hadoop.fs.azurebfs.services; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MIN_BACKOFF_INTERVAL; + import java.util.Random; import org.junit.Assert; @@ -32,7 +37,6 @@ * Unit test TestExponentialRetryPolicy. */ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { - private final int maxRetryCount = 30; private final int noRetryCount = 0; private final int retryCount = new Random().nextInt(maxRetryCount); @@ -57,12 +61,38 @@ public void testDifferentMaxIORetryCount() throws Exception { @Test public void testDefaultMaxIORetryCount() throws Exception { AbfsConfiguration abfsConfig = getAbfsConfig(); - Assert.assertTrue( + Assert.assertEquals( String.format("default maxIORetry count is %s.", maxRetryCount), - abfsConfig.getMaxIoRetries() == maxRetryCount); + maxRetryCount, abfsConfig.getMaxIoRetries()); testMaxIOConfig(abfsConfig); } + @Test + public void testAbfsConfigConstructor() throws Exception { + // Ensure we choose expected values that are not defaults + ExponentialRetryPolicy template = new ExponentialRetryPolicy( + getAbfsConfig().getMaxIoRetries()); + int testModifier = 1; + int expectedMaxRetries = template.getRetryCount() + testModifier; + int expectedMinBackoff = template.getMinBackoff() + testModifier; + int expectedMaxBackoff = template.getMaxBackoff() + testModifier; + int expectedDeltaBackoff = template.getDeltaBackoff() + testModifier; + + Configuration config = new Configuration(this.getRawConfiguration()); + config.setInt(AZURE_MAX_IO_RETRIES, expectedMaxRetries); + config.setInt(AZURE_MIN_BACKOFF_INTERVAL, expectedMinBackoff); + config.setInt(AZURE_MAX_BACKOFF_INTERVAL, expectedMaxBackoff); + config.setInt(AZURE_BACKOFF_INTERVAL, expectedDeltaBackoff); + + ExponentialRetryPolicy policy = new ExponentialRetryPolicy( + new AbfsConfiguration(config, "dummyAccountName")); + + Assert.assertEquals("Max retry count was not set as expected.", expectedMaxRetries, policy.getRetryCount()); + Assert.assertEquals("Min backoff interval was not set as expected.", expectedMinBackoff, policy.getMinBackoff()); + Assert.assertEquals("Max backoff interval was not set as expected.", expectedMaxBackoff, policy.getMaxBackoff()); + Assert.assertEquals("Delta backoff interval was not set as expected.", expectedDeltaBackoff, policy.getDeltaBackoff()); + } + private AbfsConfiguration getAbfsConfig() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); @@ -81,8 +111,8 @@ private void testMaxIOConfig(AbfsConfiguration abfsConfig) { localRetryCount++; } - Assert.assertTrue( + Assert.assertEquals( "When all retries are exhausted, the retryCount will be same as max configured", - localRetryCount == abfsConfig.getMaxIoRetries()); + abfsConfig.getMaxIoRetries(), localRetryCount); } }