From 28d190b90416e9c4be402e7ef3d3c361a4f4a5a0 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Fri, 25 Aug 2023 12:23:17 -0500 Subject: [PATCH] HADOOP-18845. Add ability to configure s3 connection ttl using fs.s3a.connection.ttl (#5948) Contributed By: Mukund Thakur --- .../org/apache/hadoop/fs/s3a/Constants.java | 11 +++++++ .../org/apache/hadoop/fs/s3a/Invoker.java | 5 ++-- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 2 ++ .../tools/hadoop-aws/troubleshooting_s3a.md | 20 +++++++++++++ .../hadoop/fs/s3a/ITestS3AConfiguration.java | 30 +++++++++++++++++++ 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 6c1d6371d1..4e35dc1731 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -154,6 +154,17 @@ private Constants() { public static final String MAXIMUM_CONNECTIONS = "fs.s3a.connection.maximum"; public static final int DEFAULT_MAXIMUM_CONNECTIONS = 96; + /** + * Configuration option to configure expiration time of + * s3 http connection from the connection pool in milliseconds: {@value}. + */ + public static final String CONNECTION_TTL = "fs.s3a.connection.ttl"; + + /** + * Default value for {@code CONNECTION_TTL}: {@value}. + */ + public static final long DEFAULT_CONNECTION_TTL = 5 * 60_000; + // connect to s3 over ssl? public static final String SECURE_CONNECTIONS = "fs.s3a.connection.ssl.enabled"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 279bfeba98..67c8e7d809 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -462,7 +462,7 @@ public T retryUntranslated( do { try { if (retryCount > 0) { - LOG.debug("retry #{}", retryCount); + LOG.debug("{} retry #{}", text, retryCount); } // execute the operation, returning if successful return operation.apply(); @@ -471,7 +471,8 @@ public T retryUntranslated( } // you only get here if the operation didn't complete // normally, hence caught != null - + LOG.debug("{} ; {}, ", text, caught.toString()); + LOG.trace("", caught); // translate the exception into an IOE for the retry logic IOException translated; if (caught instanceof IOException) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index e22433322c..27f061482c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -1299,6 +1299,8 @@ public static void initConnectionSettings(Configuration conf, ClientConfiguration awsConf) throws IOException { awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS, DEFAULT_MAXIMUM_CONNECTIONS, 1)); + awsConf.setConnectionTTL(longOption(conf, CONNECTION_TTL, + DEFAULT_CONNECTION_TTL, -1)); initProtocolSettings(conf, awsConf); awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES, DEFAULT_MAX_ERROR_RETRIES, 0)); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 3cd3bb43c5..1ead08081f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -1782,6 +1782,26 @@ will attempt to retry the operation; it may just be a transient event. If there are many such exceptions in logs, it may be a symptom of connectivity or network problems. +The above error could be because of a stale http connections. The default value in AWS +SDK is set to -1 (infinite) which means the connection will be reused indefinitely. +We have introduced a new config `fs.s3a.connection.ttl` to configure this. +Tuning this setting down (together with an appropriately-low setting for Java's DNS cache TTL) +ensures that your application will quickly rotate over to new IP addresses when the +service begins announcing them through DNS, at the cost of having to re-establish new +connections more frequently. + +```xml + + fs.s3a.connection.ttl + 300000 + + Expiration time for a connection in the connection pool in milliseconds. + When a connection is retrieved from the connection pool, + this parameter is checked to see if the connection can be reused. + Default value is 5 minutes. + + +``` ### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible ``` diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 26d00bc7d3..ff75f6e261 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -31,6 +31,8 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.test.GenericTestUtils; + +import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -511,6 +513,34 @@ public void testConfOptionPropagationToFS() throws Exception { assertOptionEquals(updated, "fs.s3a.propagation", "propagated"); } + @Test(timeout = 10_000L) + public void testConnectTtlPropagation() throws Exception { + Configuration config = new Configuration(false); + ClientConfiguration awsConf = new ClientConfiguration(); + initConnectionSettings(config, awsConf); + Assertions.assertThat(awsConf.getConnectionTTL()) + .describedAs("connection ttl should be set to default value as" + + " %s is not set", CONNECTION_TTL) + .isEqualTo(DEFAULT_CONNECTION_TTL); + long connectionTtlTestVal = 1000; + config.setLong(CONNECTION_TTL, connectionTtlTestVal); + initConnectionSettings(config, awsConf); + Assertions.assertThat(awsConf.getConnectionTTL()) + .describedAs("%s not propagated to aws conf", CONNECTION_TTL) + .isEqualTo(connectionTtlTestVal); + + long connectionTtlTestVal1 = -1; + config.setLong(CONNECTION_TTL, connectionTtlTestVal1); + initConnectionSettings(config, awsConf); + Assertions.assertThat(awsConf.getConnectionTTL()) + .describedAs("%s not propagated to aws conf", CONNECTION_TTL) + .isEqualTo(connectionTtlTestVal1); + + long connectionTtlTestVal2 = -100; + config.setLong(CONNECTION_TTL, connectionTtlTestVal2); + intercept(IllegalArgumentException.class, () -> initConnectionSettings(config, awsConf)); + } + @Test(timeout = 10_000L) public void testS3SpecificSignerOverride() throws IOException { ClientConfiguration clientConfiguration = null;