HADOOP-17771. S3AFS creation fails "Unable to find a region via the region provider chain." (#3133)

This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint
is set in fs.s3a.endpoint, S3A filesystem creation may fail on
non-EC2 deployments, depending on the local host environment setup.

* If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region
  is null, the region is set to "us-east-1".
* If fs.s3a.endpoint.region is explicitly set to "" then the client
  falls back to the SDK region resolution chain; this works on EC2
* Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+
* Also contains some minor restructuring of troubleshooting.md

Contributed by Steve Loughran.
This commit is contained in:
Steve Loughran 2021-06-24 16:37:27 +01:00 committed by GitHub
parent 581f43dce1
commit 5b7f68ac76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 526 additions and 264 deletions

View File

@ -1087,4 +1087,10 @@ private Constants() {
*/ */
public static final String AWS_REGION = "fs.s3a.endpoint.region"; public static final String AWS_REGION = "fs.s3a.endpoint.region";
/**
* The special S3 region which can be used to talk to any bucket.
* Value {@value}.
*/
public static final String AWS_S3_CENTRAL_REGION = "us-east-1";
} }

View File

@ -22,6 +22,7 @@
import java.net.URI; import java.net.URI;
import com.amazonaws.ClientConfiguration; import com.amazonaws.ClientConfiguration;
import com.amazonaws.SdkClientException;
import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3;
@ -41,10 +42,13 @@
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
import org.apache.hadoop.fs.store.LogExactlyOnce;
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
/** /**
* The default {@link S3ClientFactory} implementation. * The default {@link S3ClientFactory} implementation.
@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured
protected static final Logger LOG = protected static final Logger LOG =
LoggerFactory.getLogger(DefaultS3ClientFactory.class); LoggerFactory.getLogger(DefaultS3ClientFactory.class);
/**
* A one-off warning of default region chains in use.
*/
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
new LogExactlyOnce(LOG);
/**
* Warning message printed when the SDK Region chain is in use.
*/
private static final String SDK_REGION_CHAIN_IN_USE =
"S3A filesystem client is using"
+ " the SDK region resolution chain.";
/** /**
* Create the client by preparing the AwsConf configuration * Create the client by preparing the AwsConf configuration
* and then invoking {@code buildAmazonS3Client()}. * and then invoking {@code buildAmazonS3Client()}.
@ -94,9 +111,14 @@ public AmazonS3 createS3Client(
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix()); awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
} }
return buildAmazonS3Client( try {
awsConf, return buildAmazonS3Client(
parameters); awsConf,
parameters);
} catch (SdkClientException e) {
// SDK refused to build.
throw translateException("creating AWS S3 client", uri.toString(), e);
}
} }
/** /**
@ -109,6 +131,7 @@ public AmazonS3 createS3Client(
* @param awsConf AWS configuration * @param awsConf AWS configuration
* @param parameters parameters * @param parameters parameters
* @return new AmazonS3 client * @return new AmazonS3 client
* @throws SdkClientException if the configuration is invalid.
*/ */
protected AmazonS3 buildAmazonS3Client( protected AmazonS3 buildAmazonS3Client(
final ClientConfiguration awsConf, final ClientConfiguration awsConf,
@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client(
// no idea what the endpoint is, so tell the SDK // no idea what the endpoint is, so tell the SDK
// to work it out at the cost of an extra HEAD request // to work it out at the cost of an extra HEAD request
b.withForceGlobalBucketAccessEnabled(true); b.withForceGlobalBucketAccessEnabled(true);
// HADOOP-17771 force set the region so the build process doesn't halt.
String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
if (!region.isEmpty()) {
// there's either an explicit region or we have fallen back
// to the central one.
LOG.debug("Using default endpoint; setting region to {}", region);
b.setRegion(region);
} else {
// no region.
// allow this if people really want it; it is OK to rely on this
// when deployed in EC2.
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
LOG.debug(SDK_REGION_CHAIN_IN_USE);
}
} }
final AmazonS3 client = b.build(); final AmazonS3 client = b.build();
return client; return client;
@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
createEndpointConfiguration( createEndpointConfiguration(
final String endpoint, final ClientConfiguration awsConf, final String endpoint, final ClientConfiguration awsConf,
String awsRegion) { String awsRegion) {
LOG.debug("Creating endpoint configuration for {}", endpoint); LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
if (endpoint == null || endpoint.isEmpty()) { if (endpoint == null || endpoint.isEmpty()) {
// the default endpoint...we should be using null at this point. // the default endpoint...we should be using null at this point.
LOG.debug("Using default endpoint -no need to generate a configuration"); LOG.debug("Using default endpoint -no need to generate a configuration");

View File

@ -121,4 +121,9 @@ private InternalConstants() {
*/ */
public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000; public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000;
/**
* The system property used by the AWS SDK to identify the region.
*/
public static final String AWS_REGION_SYSPROP = "aws.region";
} }

View File

@ -438,6 +438,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he
aws_session_token = ... aws_session_token = ...
aws_security_token = ... aws_security_token = ...
``` ```
Note:
1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string.
1. For the credentials to be available to applications running in a Hadoop cluster, the
configuration files MUST be in the `~/.aws/` directory on the local filesystem in
all hosts in the cluster.
### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider` ### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider`
@ -802,8 +808,10 @@ options are covered in [Testing](./testing.md).
<property> <property>
<name>fs.s3a.endpoint.region</name> <name>fs.s3a.endpoint.region</name>
<description>AWS S3 region for a bucket, which bypasses the parsing of <description>AWS S3 region for a bucket, which bypasses the parsing of
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
while using privateLink URL and explicitly set the bucket region. while using privateLink URL and explicitly set the bucket region.
If set to a blank string (or 1+ space), falls back to the
(potentially brittle) SDK region resolution process.
</description> </description>
</property> </property>

View File

@ -18,11 +18,17 @@
## <a name="introduction"></a> Introduction ## <a name="introduction"></a> Introduction
Common problems working with S3 are Common problems working with S3 are:
1. Classpath setup 1. [Classpath setup](#classpath)
1. Authentication 1. [Authentication](#authentication)
1. Incorrect configuration 1. [Access Denial](#access_denied)
1. [Connectivity Problems](#connectivity)
1. [File System Semantics](#semantics)
1. [Encryption](#encryption)
1. [Other Errors](#other)
This document also includes some [best pactises](#best) to aid troubleshooting.
Troubleshooting IAM Assumed Roles is covered in its Troubleshooting IAM Assumed Roles is covered in its
@ -572,7 +578,7 @@ S3 sts endpoint and region like the following:
## <a name="connectivity"></a> Connectivity Problems ## <a name="connectivity"></a> Connectivity Problems
### <a name="bad_endpoint"></a> Error message "The bucket you are attempting to access must be addressed using the specified endpoint" ### <a name="bad_endpoint"></a> Error "The bucket you are attempting to access must be addressed using the specified endpoint"
This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint
which is neither the original AWS one, `s3.amazonaws.com` , nor the one where which is neither the original AWS one, `s3.amazonaws.com` , nor the one where
@ -611,6 +617,101 @@ can be used:
Using the explicit endpoint for the region is recommended for speed and Using the explicit endpoint for the region is recommended for speed and
to use the V4 signing API. to use the V4 signing API.
### <a name="NoRegion"></a> `Unable to find a region via the region provider chain`
S3A client creation fails, possibly after a pause of some seconds.
This failure surfaces when _all_ the following conditions are met:
1. Deployment outside EC2.
1. `fs.s3a.endpoint` is unset.
1. `fs.s3a.endpoint.region` is set to `""`. (Hadoop 3.3.2+ only)
1. Without the file `~/.aws/config` existing or without a region set in it.
1. Without the JVM system property `aws.region` declaring a region.
1. Without the environment variable `AWS_REGION` declaring a region.
Stack trace (Hadoop 3.3.1):
```
Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
Must provide an explicit region in the builder or setup environment to supply a region.
at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462)
at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424)
at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46)
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:145)
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:97)
at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788)
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478)
```
Log and stack trace on later releases, with
"S3A filesystem client is using the SDK region resolution chain."
warning that the SDK resolution chain is in use:
```
2021-06-23 19:56:55,971 [main] WARN s3a.DefaultS3ClientFactory (LogExactlyOnce.java:warn(39)) -
S3A filesystem client is using the SDK region resolution chain.
2021-06-23 19:56:56,073 [main] WARN fs.FileSystem (FileSystem.java:createFileSystem(3464)) -
Failed to initialize fileystem s3a://osm-pds/planet:
org.apache.hadoop.fs.s3a.AWSClientIOException: creating AWS S3 client on s3a://osm-pds:
com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
Must provide an explicit region in the builder or setup environment to supply a region.:
Unable to find a region via the region provider chain.
Must provide an explicit region in the builder or setup environment to supply a region.
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:208)
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:122)
at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788)
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3460)
at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:172)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3565)
at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3518)
at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:592)
Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
Must provide an explicit region in the builder or setup environment to supply a region.
at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462)
at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424)
at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46)
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:185)
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:117)
... 21 more
```
Due to changes in S3 client construction in Hadoop 3.3.1 this option surfaces in
non-EC2 deployments where no AWS endpoint was declared:
[HADOOP-17771](https://issues.apache.org/jira/browse/HADOOP-17771). On Hadoop
3.3.2 and later it takes active effort to create this stack trace.
**Fix: set `fs.s3a.endpoint` to `s3.amazonaws.com`**
Set `fs.s3a.endpoint` to the endpoint where the data is stored
(best), or to `s3.amazonaws.com` (second-best).
```xml
<property>
<name>fs.s3a.endpoint</name>
<value>s3.amazonaws.com</value>
</property>
```
For Apache Spark, this can be done in `spark-defaults.conf`
```
spark.hadoop.fs.s3a.endpoint s3.amazonaws.com
```
Or in Scala by editing the spark configuration during setup.
```scala
sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.amazonaws.com")
```
Tip: set the logging of `org.apache.hadoop.fs.s3a.DefaultS3ClientFactory`
to `DEBUG` to see how the endpoint and region configuration is determined.
```
log4j.logger.org.apache.hadoop.fs.s3a.DefaultS3ClientFactory=DEBUG
```
### <a name="timeout_from_pool"></a> "Timeout waiting for connection from pool" when writing data ### <a name="timeout_from_pool"></a> "Timeout waiting for connection from pool" when writing data
@ -792,257 +893,10 @@ Again, we believe this is caused by the connection to S3 being broken.
It may go away if the operation is retried. It may go away if the operation is retried.
## <a name="other"></a> Other Errors ## <a name="semantics"></a>File System Semantics
### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
Something has happened to the data as it was uploaded.
```
Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
You may need to delete the data stored in Amazon S3.
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
... 22 more
Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
You may need to delete the data stored in Amazon S3.
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
... 4 more
```
As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
with the local checksum. If there is a mismatch, this error is reported.
The uploaded data is already on S3 and will stay there, though if this happens
during a multipart upload, it may not be visible (but still billed: clean up your
multipart uploads via the `hadoop s3guard uploads` command).
Possible causes for this
1. A (possibly transient) network problem, including hardware faults.
1. A proxy server is doing bad things to the data.
1. Some signing problem, especially with third-party S3-compatible object stores.
This is a very, very rare occurrence.
If the problem is a signing one, try changing the signature algorithm.
```xml
<property>
<name>fs.s3a.signing-algorithm</name>
<value>S3SignerType</value>
</property>
```
We cannot make any promises that it will work,
only that it has been known to make the problem go away "once"
### `AWSS3IOException` The Content-MD5 you specified did not match what we received
Reads work, but writes, even `mkdir`, fail:
```
org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
on file:/tmp/hello.txt:
The Content-MD5 you specified did not match what we received.
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
S3 Extended Request ID: null
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
The Content-MD5 you specified did not match what we received.
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
S3 Extended Request ID: null
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
... 4 more
```
This stack trace was seen when interacting with a third-party S3 store whose
expectations of headers related to the AWS V4 signing mechanism was not
compatible with that of the specific AWS SDK Hadoop was using.
Workaround: revert to V2 signing.
```xml
<property>
<name>fs.s3a.signing-algorithm</name>
<value>S3SignerType</value>
</property>
```
### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
A multipart upload was trying to complete, but failed as there was no upload
with that ID.
```
java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
The upload ID may be invalid, or the upload may have been aborted or completed.
(Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
```
This can happen when all outstanding uploads have been aborted, including
the active ones.
If the bucket has a lifecycle policy of deleting multipart uploads, make
sure that the expiry time of the deletion is greater than that required
for all open writes to complete the write,
*and for all jobs using the S3A committers to commit their work.*
### Application hangs after reading a number of files
The pool of https client connections and/or IO threads have been used up,
and none are being freed.
1. The pools aren't big enough. See ["Timeout waiting for connection from pool"](#timeout_from_pool)
2. Likely root cause: whatever code is reading files isn't calling `close()`
on the input streams. Make sure your code does this!
And if it's someone else's: make sure you have a recent version; search their
issue trackers to see if its a known/fixed problem.
If not, it's time to work with the developers, or come up with a workaround
(i.e closing the input stream yourself).
### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
```
[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
- Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
```
These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client
will attempt to retry the operation; it may just be a transient event. If there
are many such exceptions in logs, it may be a symptom of connectivity or network
problems.
### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
```
Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
com.amazonaws.services.s3.model.AmazonS3Exception:
The unspecified location constraint is incompatible for the region specific
endpoint this request was sent to.
(Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
...
Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
The unspecified location constraint is incompatible for the region specific endpoint
this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
Request ID: EEBC5A08BCB3A645)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
...
```
Something has been trying to write data to "/".
## File System Semantics
These are the issues where S3 does not appear to behave the way a filesystem These are the issues where S3 does not appear to behave the way a filesystem
"should". "should". That's because it "isn't".
### File not visible/saved ### File not visible/saved
@ -1185,7 +1039,7 @@ We also recommend using applications/application
options which do not rename files when committing work or when copying data options which do not rename files when committing work or when copying data
to S3, but instead write directly to the final destination. to S3, but instead write directly to the final destination.
## Rename not behaving as "expected" ### Rename not behaving as "expected"
S3 is not a filesystem. The S3A connector mimics file and directory rename by S3 is not a filesystem. The S3A connector mimics file and directory rename by
@ -1303,7 +1157,7 @@ is used, no encryption is specified, or the SSE-C specified is incorrect.
2. A directory is encrypted with a SSE-C keyA and the user is trying to move a 2. A directory is encrypted with a SSE-C keyA and the user is trying to move a
file using configured SSE-C keyB into that structure. file using configured SSE-C keyB into that structure.
## <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream" ### <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream"
This is a message which can be generated by the Amazon SDK when the client application This is a message which can be generated by the Amazon SDK when the client application
@ -1378,8 +1232,250 @@ The specified bucket does not exist
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367) at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367)
``` ```
## <a name="other"></a> Other Errors
## Other Issues ### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
Something has happened to the data as it was uploaded.
```
Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
You may need to delete the data stored in Amazon S3.
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
... 22 more
Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
You may need to delete the data stored in Amazon S3.
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
... 4 more
```
As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
with the local checksum. If there is a mismatch, this error is reported.
The uploaded data is already on S3 and will stay there, though if this happens
during a multipart upload, it may not be visible (but still billed: clean up
your multipart uploads via the `hadoop s3guard uploads` command).
Possible causes for this
1. A (possibly transient) network problem, including hardware faults.
1. A proxy server is doing bad things to the data.
1. Some signing problem, especially with third-party S3-compatible object
stores.
This is a very, very rare occurrence.
If the problem is a signing one, try changing the signature algorithm.
```xml
<property>
<name>fs.s3a.signing-algorithm</name>
<value>S3SignerType</value>
</property>
```
We cannot make any promises that it will work, only that it has been known to
make the problem go away "once"
### `AWSS3IOException` The Content-MD5 you specified did not match what we received
Reads work, but writes, even `mkdir`, fail:
```
org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
on file:/tmp/hello.txt:
The Content-MD5 you specified did not match what we received.
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
S3 Extended Request ID: null
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
The Content-MD5 you specified did not match what we received.
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
S3 Extended Request ID: null
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
... 4 more
```
This stack trace was seen when interacting with a third-party S3 store whose
expectations of headers related to the AWS V4 signing mechanism was not
compatible with that of the specific AWS SDK Hadoop was using.
Workaround: revert to V2 signing.
```xml
<property>
<name>fs.s3a.signing-algorithm</name>
<value>S3SignerType</value>
</property>
```
### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
A multipart upload was trying to complete, but failed as there was no upload
with that ID.
```
java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
The upload ID may be invalid, or the upload may have been aborted or completed.
(Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
```
This can happen when all outstanding uploads have been aborted, including the
active ones.
If the bucket has a lifecycle policy of deleting multipart uploads, make sure
that the expiry time of the deletion is greater than that required for all open
writes to complete the write,
*and for all jobs using the S3A committers to commit their work.*
### Application hangs after reading a number of files
The pool of https client connections and/or IO threads have been used up, and
none are being freed.
1. The pools aren't big enough.
See ["Timeout waiting for connection from pool"](#timeout_from_pool)
2. Likely root cause: whatever code is reading files isn't calling `close()`
on the input streams. Make sure your code does this!
And if it's someone else's: make sure you have a recent version; search their
issue trackers to see if its a known/fixed problem. If not, it's time to work
with the developers, or come up with a workaround
(i.e closing the input stream yourself).
### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
```
[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
- Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
```
These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client
will attempt to retry the operation; it may just be a transient event. If there
are many such exceptions in logs, it may be a symptom of connectivity or network
problems.
### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
```
Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
com.amazonaws.services.s3.model.AmazonS3Exception:
The unspecified location constraint is incompatible for the region specific
endpoint this request was sent to.
(Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
...
Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
The unspecified location constraint is incompatible for the region specific endpoint
this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
Request ID: EEBC5A08BCB3A645)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
...
```
Something has been trying to write data to "/".
## <a name="best"></a> Best Practises
### <a name="logging"></a> Enabling low-level logging ### <a name="logging"></a> Enabling low-level logging
@ -1444,10 +1540,20 @@ http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) -
http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Content-Length: 0 http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Content-Length: 0
http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Server: AmazonS3 http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Server: AmazonS3
execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be kept alive for 60000 MILLISECONDS execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be kept alive for 60000 MILLISECONDS
``` ```
### <a name="audit-logging"></a> Enable S3 Server-side Logging
## <a name="retries"></a> Reducing failures by configuring retry policy The [Auditing](auditing) feature of the S3A connector can be used to generate
S3 Server Logs with information which can be used to debug problems
working with S3, such as throttling events.
Consult the [auditing documentation](auditing) documentation.
As auditing is enabled by default, enabling S3 Logging for a bucket
should be sufficient to collect these logs.
### <a name="retries"></a> Reducing failures by configuring retry policy
The S3A client can ba configured to retry those operations which are considered The S3A client can ba configured to retry those operations which are considered
retryable. That can be because they are idempotent, or retryable. That can be because they are idempotent, or

View File

@ -18,13 +18,25 @@
package org.apache.hadoop.fs.s3a; package org.apache.hadoop.fs.s3a;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import com.amazonaws.ClientConfiguration; import com.amazonaws.ClientConfiguration;
import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.util.AwsHostNameUtils; import com.amazonaws.util.AwsHostNameUtils;
import org.assertj.core.api.Assertions; import org.assertj.core.api.Assertions;
import org.junit.Test; import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_REGION_SYSPROP;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
/** /**
* Test to check correctness of S3A endpoint regions in * Test to check correctness of S3A endpoint regions in
@ -36,6 +48,7 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
private static final String AWS_ENDPOINT_TEST = "test-endpoint"; private static final String AWS_ENDPOINT_TEST = "test-endpoint";
private static final String AWS_ENDPOINT_TEST_WITH_REGION = private static final String AWS_ENDPOINT_TEST_WITH_REGION =
"test-endpoint.some-region.amazonaws.com"; "test-endpoint.some-region.amazonaws.com";
public static final String MARS_NORTH_2 = "mars-north-2";
/** /**
* Test to verify that setting a region with the config would bypass the * Test to verify that setting a region with the config would bypass the
@ -88,4 +101,90 @@ private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint,
return DefaultS3ClientFactory.createEndpointConfiguration(endpoint, return DefaultS3ClientFactory.createEndpointConfiguration(endpoint,
new ClientConfiguration(), awsRegion); new ClientConfiguration(), awsRegion);
} }
@Test
public void testInvalidRegionDefaultEndpoint() throws Throwable {
describe("Create a client with an invalid region and the default endpoint");
Configuration conf = getConfiguration();
// we are making a big assumption about the timetable for AWS
// region rollout.
// if this test ever fails because this region now exists
// -congratulations!
conf.set(AWS_REGION, MARS_NORTH_2);
createMarsNorth2Client(conf);
}
@Test
public void testUnsetRegionDefaultEndpoint() throws Throwable {
describe("Create a client with no region and the default endpoint");
Configuration conf = getConfiguration();
conf.unset(AWS_REGION);
createS3Client(conf, DEFAULT_ENDPOINT, AWS_S3_CENTRAL_REGION);
}
/**
* By setting the system property {@code "aws.region"} we can
* guarantee that the SDK region resolution chain will always succeed
* (and fast).
* Clearly there is no validation of the region during the build process.
*/
@Test
public void testBlankRegionTriggersSDKResolution() throws Throwable {
describe("Create a client with a blank region and the default endpoint."
+ " This will trigger the SDK Resolution chain");
Configuration conf = getConfiguration();
conf.set(AWS_REGION, "");
System.setProperty(AWS_REGION_SYSPROP, MARS_NORTH_2);
try {
createMarsNorth2Client(conf);
} finally {
System.clearProperty(AWS_REGION_SYSPROP);
}
}
/**
* Create an S3 client bonded to an invalid region;
* verify that calling {@code getRegion()} triggers
* a failure.
* @param conf configuration to use in the building.
*/
private void createMarsNorth2Client(Configuration conf) throws Exception {
AmazonS3 client = createS3Client(conf, DEFAULT_ENDPOINT, MARS_NORTH_2);
intercept(IllegalArgumentException.class, MARS_NORTH_2, client::getRegion);
}
/**
* Create an S3 client with the given conf and endpoint.
* The region name must then match that of the expected
* value.
* @param conf configuration to use.
* @param endpoint endpoint.
* @param expectedRegion expected region
* @return the client.
* @throws URISyntaxException parse problems.
* @throws IOException IO problems
*/
private AmazonS3 createS3Client(Configuration conf,
String endpoint,
String expectedRegion)
throws URISyntaxException, IOException {
DefaultS3ClientFactory factory
= new DefaultS3ClientFactory();
factory.setConf(conf);
S3ClientFactory.S3ClientCreationParameters parameters
= new S3ClientFactory.S3ClientCreationParameters()
.withCredentialSet(new AnonymousAWSCredentialsProvider())
.withEndpoint(endpoint)
.withMetrics(new EmptyS3AStatisticsContext()
.newStatisticsFromAwsSdk());
AmazonS3 client = factory.createS3Client(
new URI("s3a://localhost/"),
parameters);
Assertions.assertThat(client.getRegionName())
.describedAs("Client region name")
.isEqualTo(expectedRegion);
return client;
}
} }