From 5b7f68ac76c091740c7e338de86a083b758d38ef Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 24 Jun 2021 16:37:27 +0100 Subject: [PATCH] HADOOP-17771. S3AFS creation fails "Unable to find a region via the region provider chain." (#3133) This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint is set in fs.s3a.endpoint, S3A filesystem creation may fail on non-EC2 deployments, depending on the local host environment setup. * If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region is null, the region is set to "us-east-1". * If fs.s3a.endpoint.region is explicitly set to "" then the client falls back to the SDK region resolution chain; this works on EC2 * Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+ * Also contains some minor restructuring of troubleshooting.md Contributed by Steve Loughran. --- .../org/apache/hadoop/fs/s3a/Constants.java | 6 + .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 46 +- .../hadoop/fs/s3a/impl/InternalConstants.java | 5 + .../site/markdown/tools/hadoop-aws/index.md | 12 +- .../tools/hadoop-aws/troubleshooting_s3a.md | 622 ++++++++++-------- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 99 +++ 6 files changed, 526 insertions(+), 264 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 8dc6bba1cc..b741bc2301 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1087,4 +1087,10 @@ private Constants() { */ public static final String AWS_REGION = "fs.s3a.endpoint.region"; + /** + * The special S3 region which can be used to talk to any bucket. + * Value {@value}. + */ + public static final String AWS_S3_CENTRAL_REGION = "us-east-1"; + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 6e84497cb9..7dc920ce50 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -22,6 +22,7 @@ import java.net.URI; import com.amazonaws.ClientConfiguration; +import com.amazonaws.SdkClientException; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.services.s3.AmazonS3; @@ -41,10 +42,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; +import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; +import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; /** * The default {@link S3ClientFactory} implementation. @@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured protected static final Logger LOG = LoggerFactory.getLogger(DefaultS3ClientFactory.class); + /** + * A one-off warning of default region chains in use. + */ + private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN = + new LogExactlyOnce(LOG); + + /** + * Warning message printed when the SDK Region chain is in use. + */ + private static final String SDK_REGION_CHAIN_IN_USE = + "S3A filesystem client is using" + + " the SDK region resolution chain."; + /** * Create the client by preparing the AwsConf configuration * and then invoking {@code buildAmazonS3Client()}. @@ -94,9 +111,14 @@ public AmazonS3 createS3Client( awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix()); } - return buildAmazonS3Client( - awsConf, - parameters); + try { + return buildAmazonS3Client( + awsConf, + parameters); + } catch (SdkClientException e) { + // SDK refused to build. + throw translateException("creating AWS S3 client", uri.toString(), e); + } } /** @@ -109,6 +131,7 @@ public AmazonS3 createS3Client( * @param awsConf AWS configuration * @param parameters parameters * @return new AmazonS3 client + * @throws SdkClientException if the configuration is invalid. */ protected AmazonS3 buildAmazonS3Client( final ClientConfiguration awsConf, @@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client( // no idea what the endpoint is, so tell the SDK // to work it out at the cost of an extra HEAD request b.withForceGlobalBucketAccessEnabled(true); + // HADOOP-17771 force set the region so the build process doesn't halt. + String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION); + LOG.debug("fs.s3a.endpoint.region=\"{}\"", region); + if (!region.isEmpty()) { + // there's either an explicit region or we have fallen back + // to the central one. + LOG.debug("Using default endpoint; setting region to {}", region); + b.setRegion(region); + } else { + // no region. + // allow this if people really want it; it is OK to rely on this + // when deployed in EC2. + WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE); + LOG.debug(SDK_REGION_CHAIN_IN_USE); + } } final AmazonS3 client = b.build(); return client; @@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, createEndpointConfiguration( final String endpoint, final ClientConfiguration awsConf, String awsRegion) { - LOG.debug("Creating endpoint configuration for {}", endpoint); + LOG.debug("Creating endpoint configuration for \"{}\"", endpoint); if (endpoint == null || endpoint.isEmpty()) { // the default endpoint...we should be using null at this point. LOG.debug("Using default endpoint -no need to generate a configuration"); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index d6142f49c9..cf962b87a4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -121,4 +121,9 @@ private InternalConstants() { */ public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000; + /** + * The system property used by the AWS SDK to identify the region. + */ + public static final String AWS_REGION_SYSPROP = "aws.region"; + } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index aff7694c52..48b99caf65 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -438,6 +438,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he aws_session_token = ... aws_security_token = ... ``` +Note: + +1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string. +1. For the credentials to be available to applications running in a Hadoop cluster, the + configuration files MUST be in the `~/.aws/` directory on the local filesystem in + all hosts in the cluster. ### Using Session Credentials with `TemporaryAWSCredentialsProvider` @@ -802,8 +808,10 @@ options are covered in [Testing](./testing.md). fs.s3a.endpoint.region AWS S3 region for a bucket, which bypasses the parsing of - fs.s3a.endpoint to know the region. Would be helpful in avoiding errors - while using privateLink URL and explicitly set the bucket region. + fs.s3a.endpoint to know the region. Would be helpful in avoiding errors + while using privateLink URL and explicitly set the bucket region. + If set to a blank string (or 1+ space), falls back to the + (potentially brittle) SDK region resolution process. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index d91607d3fd..30047edfb5 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -18,11 +18,17 @@ ## Introduction -Common problems working with S3 are +Common problems working with S3 are: -1. Classpath setup -1. Authentication -1. Incorrect configuration +1. [Classpath setup](#classpath) +1. [Authentication](#authentication) +1. [Access Denial](#access_denied) +1. [Connectivity Problems](#connectivity) +1. [File System Semantics](#semantics) +1. [Encryption](#encryption) +1. [Other Errors](#other) + +This document also includes some [best pactises](#best) to aid troubleshooting. Troubleshooting IAM Assumed Roles is covered in its @@ -572,7 +578,7 @@ S3 sts endpoint and region like the following: ## Connectivity Problems -### Error message "The bucket you are attempting to access must be addressed using the specified endpoint" +### Error "The bucket you are attempting to access must be addressed using the specified endpoint" This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint which is neither the original AWS one, `s3.amazonaws.com` , nor the one where @@ -611,6 +617,101 @@ can be used: Using the explicit endpoint for the region is recommended for speed and to use the V4 signing API. +### `Unable to find a region via the region provider chain` + +S3A client creation fails, possibly after a pause of some seconds. + +This failure surfaces when _all_ the following conditions are met: + +1. Deployment outside EC2. +1. `fs.s3a.endpoint` is unset. +1. `fs.s3a.endpoint.region` is set to `""`. (Hadoop 3.3.2+ only) +1. Without the file `~/.aws/config` existing or without a region set in it. +1. Without the JVM system property `aws.region` declaring a region. +1. Without the environment variable `AWS_REGION` declaring a region. + +Stack trace (Hadoop 3.3.1): +``` +Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain. + Must provide an explicit region in the builder or setup environment to supply a region. + at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462) + at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424) + at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46) + at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:145) + at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:97) + at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478) +``` + +Log and stack trace on later releases, with +"S3A filesystem client is using the SDK region resolution chain." +warning that the SDK resolution chain is in use: + +``` +2021-06-23 19:56:55,971 [main] WARN s3a.DefaultS3ClientFactory (LogExactlyOnce.java:warn(39)) - + S3A filesystem client is using the SDK region resolution chain. + +2021-06-23 19:56:56,073 [main] WARN fs.FileSystem (FileSystem.java:createFileSystem(3464)) - + Failed to initialize fileystem s3a://osm-pds/planet: + org.apache.hadoop.fs.s3a.AWSClientIOException: creating AWS S3 client on s3a://osm-pds: + com.amazonaws.SdkClientException: Unable to find a region via the region provider chain. + Must provide an explicit region in the builder or setup environment to supply a region.: + Unable to find a region via the region provider chain. + Must provide an explicit region in the builder or setup environment to supply a region. + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:208) + at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:122) + at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788) + at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478) + at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3460) + at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:172) + at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3565) + at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3518) + at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:592) +Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain. + Must provide an explicit region in the builder or setup environment to supply a region. + at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462) + at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424) + at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46) + at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:185) + at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:117) + ... 21 more +``` + +Due to changes in S3 client construction in Hadoop 3.3.1 this option surfaces in +non-EC2 deployments where no AWS endpoint was declared: +[HADOOP-17771](https://issues.apache.org/jira/browse/HADOOP-17771). On Hadoop +3.3.2 and later it takes active effort to create this stack trace. + +**Fix: set `fs.s3a.endpoint` to `s3.amazonaws.com`** + +Set `fs.s3a.endpoint` to the endpoint where the data is stored +(best), or to `s3.amazonaws.com` (second-best). + +```xml + + fs.s3a.endpoint + s3.amazonaws.com + +``` + +For Apache Spark, this can be done in `spark-defaults.conf` + +``` +spark.hadoop.fs.s3a.endpoint s3.amazonaws.com +``` + +Or in Scala by editing the spark configuration during setup. + +```scala +sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.amazonaws.com") +``` + +Tip: set the logging of `org.apache.hadoop.fs.s3a.DefaultS3ClientFactory` +to `DEBUG` to see how the endpoint and region configuration is determined. + +``` +log4j.logger.org.apache.hadoop.fs.s3a.DefaultS3ClientFactory=DEBUG +``` ### "Timeout waiting for connection from pool" when writing data @@ -792,257 +893,10 @@ Again, we believe this is caused by the connection to S3 being broken. It may go away if the operation is retried. -## Other Errors - -### `SdkClientException` Unable to verify integrity of data upload - -Something has happened to the data as it was uploaded. - -``` -Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0: - com.amazonaws.AmazonClientException: Unable to verify integrity of data upload. - Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64) - didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3. - You may need to delete the data stored in Amazon S3. - (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null, - bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0): - at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144) - at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121) - at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) - at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) - at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99) - at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190) - ... 22 more -Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload. - Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64) - didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3. - You may need to delete the data stored in Amazon S3. - (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null, - bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0) - at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492) - at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131) - at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123) - at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139) - at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47) - ... 4 more -``` - -As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was -PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared -with the local checksum. If there is a mismatch, this error is reported. - -The uploaded data is already on S3 and will stay there, though if this happens -during a multipart upload, it may not be visible (but still billed: clean up your -multipart uploads via the `hadoop s3guard uploads` command). - -Possible causes for this - -1. A (possibly transient) network problem, including hardware faults. -1. A proxy server is doing bad things to the data. -1. Some signing problem, especially with third-party S3-compatible object stores. - -This is a very, very rare occurrence. - -If the problem is a signing one, try changing the signature algorithm. - -```xml - - fs.s3a.signing-algorithm - S3SignerType - -``` - -We cannot make any promises that it will work, -only that it has been known to make the problem go away "once" - -### `AWSS3IOException` The Content-MD5 you specified did not match what we received - -Reads work, but writes, even `mkdir`, fail: - -``` -org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt) - on file:/tmp/hello.txt: - The Content-MD5 you specified did not match what we received. - (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225), - S3 Extended Request ID: null - at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127) - at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69) - at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494) - at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466) - at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63) - at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353) - at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350) - at java.util.concurrent.FutureTask.run(FutureTask.java:266) - at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) - at java.util.concurrent.FutureTask.run(FutureTask.java:266) - at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) - at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) - at java.lang.Thread.run(Thread.java:748) -Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: - The Content-MD5 you specified did not match what we received. - (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225), - S3 Extended Request ID: null - at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597) - at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363) - at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659) - at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422) - at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131) - at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123) - at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139) - at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47) - at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) - ... 4 more -``` - -This stack trace was seen when interacting with a third-party S3 store whose -expectations of headers related to the AWS V4 signing mechanism was not -compatible with that of the specific AWS SDK Hadoop was using. - -Workaround: revert to V2 signing. - -```xml - - fs.s3a.signing-algorithm - S3SignerType - -``` - -### When writing data: "java.io.FileNotFoundException: Completing multi-part upload" - - -A multipart upload was trying to complete, but failed as there was no upload -with that ID. - -``` -java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b: - com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist. - The upload ID may be invalid, or the upload may have been aborted or completed. - (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload; - at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) - at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272) - at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) - at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) -``` - -This can happen when all outstanding uploads have been aborted, including -the active ones. - -If the bucket has a lifecycle policy of deleting multipart uploads, make -sure that the expiry time of the deletion is greater than that required -for all open writes to complete the write, -*and for all jobs using the S3A committers to commit their work.* - - -### Application hangs after reading a number of files - - -The pool of https client connections and/or IO threads have been used up, -and none are being freed. - - -1. The pools aren't big enough. See ["Timeout waiting for connection from pool"](#timeout_from_pool) -2. Likely root cause: whatever code is reading files isn't calling `close()` -on the input streams. Make sure your code does this! -And if it's someone else's: make sure you have a recent version; search their -issue trackers to see if its a known/fixed problem. -If not, it's time to work with the developers, or come up with a workaround -(i.e closing the input stream yourself). - - - -### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient` - -``` -[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) - - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond -org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond - at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143) - at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57) - at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261) - at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283) - at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259) - at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209) - at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272) - at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66) - at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124) - at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686) - at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488) - at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884) - at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) - at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) - at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728) - at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) - at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731) - at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41) - at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28) - at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) - at java.util.concurrent.FutureTask.run(FutureTask.java:266) - at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) - at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) - at java.lang.Thread.run(Thread.java:745) -``` - -These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client -will attempt to retry the operation; it may just be a transient event. If there -are many such exceptions in logs, it may be a symptom of connectivity or network -problems. - -### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible - -``` - Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on : - com.amazonaws.services.s3.model.AmazonS3Exception: - The unspecified location constraint is incompatible for the region specific - endpoint this request was sent to. - (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException; - - at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178) - at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64) - at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451) - at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128) - at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373) - at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) - at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101) - at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429) - at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106) - at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91) - ... - Cause: com.amazonaws.services.s3.model.AmazonS3Exception: - The unspecified location constraint is incompatible for the region specific endpoint - this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException; - Request ID: EEBC5A08BCB3A645) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699) - at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667) - at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649) - at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513) - at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221) - ... -``` - -Something has been trying to write data to "/". - -## File System Semantics +## File System Semantics These are the issues where S3 does not appear to behave the way a filesystem -"should". +"should". That's because it "isn't". ### File not visible/saved @@ -1185,7 +1039,7 @@ We also recommend using applications/application options which do not rename files when committing work or when copying data to S3, but instead write directly to the final destination. -## Rename not behaving as "expected" +### Rename not behaving as "expected" S3 is not a filesystem. The S3A connector mimics file and directory rename by @@ -1303,7 +1157,7 @@ is used, no encryption is specified, or the SSE-C specified is incorrect. 2. A directory is encrypted with a SSE-C keyA and the user is trying to move a file using configured SSE-C keyB into that structure. -## Message appears in logs "Not all bytes were read from the S3ObjectInputStream" +### Message appears in logs "Not all bytes were read from the S3ObjectInputStream" This is a message which can be generated by the Amazon SDK when the client application @@ -1378,8 +1232,250 @@ The specified bucket does not exist at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367) ``` +## Other Errors -## Other Issues +### `SdkClientException` Unable to verify integrity of data upload + +Something has happened to the data as it was uploaded. + +``` +Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0: + com.amazonaws.AmazonClientException: Unable to verify integrity of data upload. + Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64) + didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3. + You may need to delete the data stored in Amazon S3. + (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null, + bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0): + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144) + at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121) + at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) + at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) + at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99) + at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190) + ... 22 more +Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload. + Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64) + didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3. + You may need to delete the data stored in Amazon S3. + (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null, + bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0) + at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492) + at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131) + at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123) + at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139) + at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47) + ... 4 more +``` + +As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was +PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared +with the local checksum. If there is a mismatch, this error is reported. + +The uploaded data is already on S3 and will stay there, though if this happens +during a multipart upload, it may not be visible (but still billed: clean up +your multipart uploads via the `hadoop s3guard uploads` command). + +Possible causes for this + +1. A (possibly transient) network problem, including hardware faults. +1. A proxy server is doing bad things to the data. +1. Some signing problem, especially with third-party S3-compatible object + stores. + +This is a very, very rare occurrence. + +If the problem is a signing one, try changing the signature algorithm. + +```xml + + fs.s3a.signing-algorithm + S3SignerType + +``` + +We cannot make any promises that it will work, only that it has been known to +make the problem go away "once" + +### `AWSS3IOException` The Content-MD5 you specified did not match what we received + +Reads work, but writes, even `mkdir`, fail: + +``` +org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt) + on file:/tmp/hello.txt: + The Content-MD5 you specified did not match what we received. + (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225), + S3 Extended Request ID: null + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127) + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69) + at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494) + at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466) + at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63) + at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353) + at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) + at java.lang.Thread.run(Thread.java:748) +Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: + The Content-MD5 you specified did not match what we received. + (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225), + S3 Extended Request ID: null + at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597) + at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363) + at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659) + at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422) + at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131) + at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123) + at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139) + at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47) + at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) + ... 4 more +``` + +This stack trace was seen when interacting with a third-party S3 store whose +expectations of headers related to the AWS V4 signing mechanism was not +compatible with that of the specific AWS SDK Hadoop was using. + +Workaround: revert to V2 signing. + +```xml + + fs.s3a.signing-algorithm + S3SignerType + +``` + +### When writing data: "java.io.FileNotFoundException: Completing multi-part upload" + +A multipart upload was trying to complete, but failed as there was no upload +with that ID. + +``` +java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b: + com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist. + The upload ID may be invalid, or the upload may have been aborted or completed. + (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload; + at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) + at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272) + at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) + at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106) +``` + +This can happen when all outstanding uploads have been aborted, including the +active ones. + +If the bucket has a lifecycle policy of deleting multipart uploads, make sure +that the expiry time of the deletion is greater than that required for all open +writes to complete the write, +*and for all jobs using the S3A committers to commit their work.* + +### Application hangs after reading a number of files + +The pool of https client connections and/or IO threads have been used up, and +none are being freed. + +1. The pools aren't big enough. + See ["Timeout waiting for connection from pool"](#timeout_from_pool) +2. Likely root cause: whatever code is reading files isn't calling `close()` + on the input streams. Make sure your code does this! + And if it's someone else's: make sure you have a recent version; search their + issue trackers to see if its a known/fixed problem. If not, it's time to work + with the developers, or come up with a workaround + (i.e closing the input stream yourself). + +### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient` + +``` +[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496)) + - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond +org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond + at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143) + at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57) + at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261) + at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283) + at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259) + at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209) + at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272) + at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66) + at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124) + at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686) + at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488) + at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884) + at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82) + at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55) + at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728) + at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785) + at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731) + at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41) + at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28) + at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) + at java.lang.Thread.run(Thread.java:745) +``` + +These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client +will attempt to retry the operation; it may just be a transient event. If there +are many such exceptions in logs, it may be a symptom of connectivity or network +problems. + +### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible + +``` + Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on : + com.amazonaws.services.s3.model.AmazonS3Exception: + The unspecified location constraint is incompatible for the region specific + endpoint this request was sent to. + (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException; + + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178) + at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64) + at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451) + at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128) + at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373) + at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72) + at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101) + at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429) + at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106) + at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91) + ... + Cause: com.amazonaws.services.s3.model.AmazonS3Exception: + The unspecified location constraint is incompatible for the region specific endpoint + this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException; + Request ID: EEBC5A08BCB3A645) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699) + at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667) + at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649) + at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513) + at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221) + ... +``` + +Something has been trying to write data to "/". + +## Best Practises ### Enabling low-level logging @@ -1444,10 +1540,20 @@ http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Content-Length: 0 http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Server: AmazonS3 execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be kept alive for 60000 MILLISECONDS + ``` +### Enable S3 Server-side Logging -## Reducing failures by configuring retry policy +The [Auditing](auditing) feature of the S3A connector can be used to generate +S3 Server Logs with information which can be used to debug problems +working with S3, such as throttling events. + +Consult the [auditing documentation](auditing) documentation. +As auditing is enabled by default, enabling S3 Logging for a bucket +should be sufficient to collect these logs. + +### Reducing failures by configuring retry policy The S3A client can ba configured to retry those operations which are considered retryable. That can be because they are idempotent, or diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index abd637a524..761dd55806 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -18,13 +18,25 @@ package org.apache.hadoop.fs.s3a; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + import com.amazonaws.ClientConfiguration; import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.util.AwsHostNameUtils; import org.assertj.core.api.Assertions; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; + import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_REGION_SYSPROP; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test to check correctness of S3A endpoint regions in @@ -36,6 +48,7 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String AWS_ENDPOINT_TEST = "test-endpoint"; private static final String AWS_ENDPOINT_TEST_WITH_REGION = "test-endpoint.some-region.amazonaws.com"; + public static final String MARS_NORTH_2 = "mars-north-2"; /** * Test to verify that setting a region with the config would bypass the @@ -88,4 +101,90 @@ private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint, return DefaultS3ClientFactory.createEndpointConfiguration(endpoint, new ClientConfiguration(), awsRegion); } + + + @Test + public void testInvalidRegionDefaultEndpoint() throws Throwable { + describe("Create a client with an invalid region and the default endpoint"); + Configuration conf = getConfiguration(); + // we are making a big assumption about the timetable for AWS + // region rollout. + // if this test ever fails because this region now exists + // -congratulations! + conf.set(AWS_REGION, MARS_NORTH_2); + createMarsNorth2Client(conf); + } + + @Test + public void testUnsetRegionDefaultEndpoint() throws Throwable { + describe("Create a client with no region and the default endpoint"); + Configuration conf = getConfiguration(); + conf.unset(AWS_REGION); + createS3Client(conf, DEFAULT_ENDPOINT, AWS_S3_CENTRAL_REGION); + } + + /** + * By setting the system property {@code "aws.region"} we can + * guarantee that the SDK region resolution chain will always succeed + * (and fast). + * Clearly there is no validation of the region during the build process. + */ + @Test + public void testBlankRegionTriggersSDKResolution() throws Throwable { + describe("Create a client with a blank region and the default endpoint." + + " This will trigger the SDK Resolution chain"); + Configuration conf = getConfiguration(); + conf.set(AWS_REGION, ""); + System.setProperty(AWS_REGION_SYSPROP, MARS_NORTH_2); + try { + createMarsNorth2Client(conf); + } finally { + System.clearProperty(AWS_REGION_SYSPROP); + } + } + + /** + * Create an S3 client bonded to an invalid region; + * verify that calling {@code getRegion()} triggers + * a failure. + * @param conf configuration to use in the building. + */ + private void createMarsNorth2Client(Configuration conf) throws Exception { + AmazonS3 client = createS3Client(conf, DEFAULT_ENDPOINT, MARS_NORTH_2); + intercept(IllegalArgumentException.class, MARS_NORTH_2, client::getRegion); + } + + /** + * Create an S3 client with the given conf and endpoint. + * The region name must then match that of the expected + * value. + * @param conf configuration to use. + * @param endpoint endpoint. + * @param expectedRegion expected region + * @return the client. + * @throws URISyntaxException parse problems. + * @throws IOException IO problems + */ + private AmazonS3 createS3Client(Configuration conf, + String endpoint, + String expectedRegion) + throws URISyntaxException, IOException { + + DefaultS3ClientFactory factory + = new DefaultS3ClientFactory(); + factory.setConf(conf); + S3ClientFactory.S3ClientCreationParameters parameters + = new S3ClientFactory.S3ClientCreationParameters() + .withCredentialSet(new AnonymousAWSCredentialsProvider()) + .withEndpoint(endpoint) + .withMetrics(new EmptyS3AStatisticsContext() + .newStatisticsFromAwsSdk()); + AmazonS3 client = factory.createS3Client( + new URI("s3a://localhost/"), + parameters); + Assertions.assertThat(client.getRegionName()) + .describedAs("Client region name") + .isEqualTo(expectedRegion); + return client; + } }