HADOOP-17771. S3AFS creation fails "Unable to find a region via the region provider chain." (#3133)
This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint is set in fs.s3a.endpoint, S3A filesystem creation may fail on non-EC2 deployments, depending on the local host environment setup. * If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region is null, the region is set to "us-east-1". * If fs.s3a.endpoint.region is explicitly set to "" then the client falls back to the SDK region resolution chain; this works on EC2 * Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+ * Also contains some minor restructuring of troubleshooting.md Contributed by Steve Loughran. Change-Id: Ife482cff513307cd52d59eec56beac0a33e031f5
This commit is contained in:
parent
4ca49fcd44
commit
39e6f2d191
@ -1087,4 +1087,10 @@ private Constants() {
|
||||
*/
|
||||
public static final String AWS_REGION = "fs.s3a.endpoint.region";
|
||||
|
||||
/**
|
||||
* The special S3 region which can be used to talk to any bucket.
|
||||
* Value {@value}.
|
||||
*/
|
||||
public static final String AWS_S3_CENTRAL_REGION = "us-east-1";
|
||||
|
||||
}
|
||||
|
@ -22,6 +22,7 @@
|
||||
import java.net.URI;
|
||||
|
||||
import com.amazonaws.ClientConfiguration;
|
||||
import com.amazonaws.SdkClientException;
|
||||
import com.amazonaws.client.builder.AwsClientBuilder;
|
||||
import com.amazonaws.handlers.RequestHandler2;
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
@ -41,10 +42,13 @@
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
|
||||
import org.apache.hadoop.fs.store.LogExactlyOnce;
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
|
||||
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
|
||||
|
||||
/**
|
||||
* The default {@link S3ClientFactory} implementation.
|
||||
@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured
|
||||
protected static final Logger LOG =
|
||||
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
|
||||
|
||||
/**
|
||||
* A one-off warning of default region chains in use.
|
||||
*/
|
||||
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
|
||||
new LogExactlyOnce(LOG);
|
||||
|
||||
/**
|
||||
* Warning message printed when the SDK Region chain is in use.
|
||||
*/
|
||||
private static final String SDK_REGION_CHAIN_IN_USE =
|
||||
"S3A filesystem client is using"
|
||||
+ " the SDK region resolution chain.";
|
||||
|
||||
/**
|
||||
* Create the client by preparing the AwsConf configuration
|
||||
* and then invoking {@code buildAmazonS3Client()}.
|
||||
@ -94,9 +111,14 @@ public AmazonS3 createS3Client(
|
||||
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
|
||||
}
|
||||
|
||||
return buildAmazonS3Client(
|
||||
awsConf,
|
||||
parameters);
|
||||
try {
|
||||
return buildAmazonS3Client(
|
||||
awsConf,
|
||||
parameters);
|
||||
} catch (SdkClientException e) {
|
||||
// SDK refused to build.
|
||||
throw translateException("creating AWS S3 client", uri.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -109,6 +131,7 @@ public AmazonS3 createS3Client(
|
||||
* @param awsConf AWS configuration
|
||||
* @param parameters parameters
|
||||
* @return new AmazonS3 client
|
||||
* @throws SdkClientException if the configuration is invalid.
|
||||
*/
|
||||
protected AmazonS3 buildAmazonS3Client(
|
||||
final ClientConfiguration awsConf,
|
||||
@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client(
|
||||
// no idea what the endpoint is, so tell the SDK
|
||||
// to work it out at the cost of an extra HEAD request
|
||||
b.withForceGlobalBucketAccessEnabled(true);
|
||||
// HADOOP-17771 force set the region so the build process doesn't halt.
|
||||
String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
|
||||
LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
|
||||
if (!region.isEmpty()) {
|
||||
// there's either an explicit region or we have fallen back
|
||||
// to the central one.
|
||||
LOG.debug("Using default endpoint; setting region to {}", region);
|
||||
b.setRegion(region);
|
||||
} else {
|
||||
// no region.
|
||||
// allow this if people really want it; it is OK to rely on this
|
||||
// when deployed in EC2.
|
||||
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
|
||||
LOG.debug(SDK_REGION_CHAIN_IN_USE);
|
||||
}
|
||||
}
|
||||
final AmazonS3 client = b.build();
|
||||
return client;
|
||||
@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
|
||||
createEndpointConfiguration(
|
||||
final String endpoint, final ClientConfiguration awsConf,
|
||||
String awsRegion) {
|
||||
LOG.debug("Creating endpoint configuration for {}", endpoint);
|
||||
LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
|
||||
if (endpoint == null || endpoint.isEmpty()) {
|
||||
// the default endpoint...we should be using null at this point.
|
||||
LOG.debug("Using default endpoint -no need to generate a configuration");
|
||||
|
@ -121,4 +121,9 @@ private InternalConstants() {
|
||||
*/
|
||||
public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000;
|
||||
|
||||
/**
|
||||
* The system property used by the AWS SDK to identify the region.
|
||||
*/
|
||||
public static final String AWS_REGION_SYSPROP = "aws.region";
|
||||
|
||||
}
|
||||
|
@ -438,6 +438,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he
|
||||
aws_session_token = ...
|
||||
aws_security_token = ...
|
||||
```
|
||||
Note:
|
||||
|
||||
1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string.
|
||||
1. For the credentials to be available to applications running in a Hadoop cluster, the
|
||||
configuration files MUST be in the `~/.aws/` directory on the local filesystem in
|
||||
all hosts in the cluster.
|
||||
|
||||
### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider`
|
||||
|
||||
@ -802,8 +808,10 @@ options are covered in [Testing](./testing.md).
|
||||
<property>
|
||||
<name>fs.s3a.endpoint.region</name>
|
||||
<description>AWS S3 region for a bucket, which bypasses the parsing of
|
||||
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
|
||||
while using privateLink URL and explicitly set the bucket region.
|
||||
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
|
||||
while using privateLink URL and explicitly set the bucket region.
|
||||
If set to a blank string (or 1+ space), falls back to the
|
||||
(potentially brittle) SDK region resolution process.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
@ -18,11 +18,17 @@
|
||||
|
||||
## <a name="introduction"></a> Introduction
|
||||
|
||||
Common problems working with S3 are
|
||||
Common problems working with S3 are:
|
||||
|
||||
1. Classpath setup
|
||||
1. Authentication
|
||||
1. Incorrect configuration
|
||||
1. [Classpath setup](#classpath)
|
||||
1. [Authentication](#authentication)
|
||||
1. [Access Denial](#access_denied)
|
||||
1. [Connectivity Problems](#connectivity)
|
||||
1. [File System Semantics](#semantics)
|
||||
1. [Encryption](#encryption)
|
||||
1. [Other Errors](#other)
|
||||
|
||||
This document also includes some [best pactises](#best) to aid troubleshooting.
|
||||
|
||||
|
||||
Troubleshooting IAM Assumed Roles is covered in its
|
||||
@ -572,7 +578,7 @@ S3 sts endpoint and region like the following:
|
||||
|
||||
## <a name="connectivity"></a> Connectivity Problems
|
||||
|
||||
### <a name="bad_endpoint"></a> Error message "The bucket you are attempting to access must be addressed using the specified endpoint"
|
||||
### <a name="bad_endpoint"></a> Error "The bucket you are attempting to access must be addressed using the specified endpoint"
|
||||
|
||||
This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint
|
||||
which is neither the original AWS one, `s3.amazonaws.com` , nor the one where
|
||||
@ -611,6 +617,101 @@ can be used:
|
||||
Using the explicit endpoint for the region is recommended for speed and
|
||||
to use the V4 signing API.
|
||||
|
||||
### <a name="NoRegion"></a> `Unable to find a region via the region provider chain`
|
||||
|
||||
S3A client creation fails, possibly after a pause of some seconds.
|
||||
|
||||
This failure surfaces when _all_ the following conditions are met:
|
||||
|
||||
1. Deployment outside EC2.
|
||||
1. `fs.s3a.endpoint` is unset.
|
||||
1. `fs.s3a.endpoint.region` is set to `""`. (Hadoop 3.3.2+ only)
|
||||
1. Without the file `~/.aws/config` existing or without a region set in it.
|
||||
1. Without the JVM system property `aws.region` declaring a region.
|
||||
1. Without the environment variable `AWS_REGION` declaring a region.
|
||||
|
||||
Stack trace (Hadoop 3.3.1):
|
||||
```
|
||||
Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
|
||||
Must provide an explicit region in the builder or setup environment to supply a region.
|
||||
at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462)
|
||||
at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424)
|
||||
at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46)
|
||||
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:145)
|
||||
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:97)
|
||||
at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788)
|
||||
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478)
|
||||
```
|
||||
|
||||
Log and stack trace on later releases, with
|
||||
"S3A filesystem client is using the SDK region resolution chain."
|
||||
warning that the SDK resolution chain is in use:
|
||||
|
||||
```
|
||||
2021-06-23 19:56:55,971 [main] WARN s3a.DefaultS3ClientFactory (LogExactlyOnce.java:warn(39)) -
|
||||
S3A filesystem client is using the SDK region resolution chain.
|
||||
|
||||
2021-06-23 19:56:56,073 [main] WARN fs.FileSystem (FileSystem.java:createFileSystem(3464)) -
|
||||
Failed to initialize fileystem s3a://osm-pds/planet:
|
||||
org.apache.hadoop.fs.s3a.AWSClientIOException: creating AWS S3 client on s3a://osm-pds:
|
||||
com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
|
||||
Must provide an explicit region in the builder or setup environment to supply a region.:
|
||||
Unable to find a region via the region provider chain.
|
||||
Must provide an explicit region in the builder or setup environment to supply a region.
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:208)
|
||||
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:122)
|
||||
at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788)
|
||||
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478)
|
||||
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3460)
|
||||
at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:172)
|
||||
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3565)
|
||||
at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3518)
|
||||
at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:592)
|
||||
Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
|
||||
Must provide an explicit region in the builder or setup environment to supply a region.
|
||||
at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462)
|
||||
at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424)
|
||||
at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46)
|
||||
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:185)
|
||||
at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:117)
|
||||
... 21 more
|
||||
```
|
||||
|
||||
Due to changes in S3 client construction in Hadoop 3.3.1 this option surfaces in
|
||||
non-EC2 deployments where no AWS endpoint was declared:
|
||||
[HADOOP-17771](https://issues.apache.org/jira/browse/HADOOP-17771). On Hadoop
|
||||
3.3.2 and later it takes active effort to create this stack trace.
|
||||
|
||||
**Fix: set `fs.s3a.endpoint` to `s3.amazonaws.com`**
|
||||
|
||||
Set `fs.s3a.endpoint` to the endpoint where the data is stored
|
||||
(best), or to `s3.amazonaws.com` (second-best).
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.endpoint</name>
|
||||
<value>s3.amazonaws.com</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
For Apache Spark, this can be done in `spark-defaults.conf`
|
||||
|
||||
```
|
||||
spark.hadoop.fs.s3a.endpoint s3.amazonaws.com
|
||||
```
|
||||
|
||||
Or in Scala by editing the spark configuration during setup.
|
||||
|
||||
```scala
|
||||
sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.amazonaws.com")
|
||||
```
|
||||
|
||||
Tip: set the logging of `org.apache.hadoop.fs.s3a.DefaultS3ClientFactory`
|
||||
to `DEBUG` to see how the endpoint and region configuration is determined.
|
||||
|
||||
```
|
||||
log4j.logger.org.apache.hadoop.fs.s3a.DefaultS3ClientFactory=DEBUG
|
||||
```
|
||||
|
||||
### <a name="timeout_from_pool"></a> "Timeout waiting for connection from pool" when writing data
|
||||
|
||||
@ -792,257 +893,10 @@ Again, we believe this is caused by the connection to S3 being broken.
|
||||
It may go away if the operation is retried.
|
||||
|
||||
|
||||
## <a name="other"></a> Other Errors
|
||||
|
||||
### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
|
||||
|
||||
Something has happened to the data as it was uploaded.
|
||||
|
||||
```
|
||||
Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
|
||||
com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
|
||||
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
|
||||
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
|
||||
You may need to delete the data stored in Amazon S3.
|
||||
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
|
||||
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
|
||||
at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
|
||||
at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
|
||||
at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
|
||||
... 22 more
|
||||
Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
|
||||
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
|
||||
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
|
||||
You may need to delete the data stored in Amazon S3.
|
||||
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
|
||||
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
|
||||
... 4 more
|
||||
```
|
||||
|
||||
As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
|
||||
PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
|
||||
with the local checksum. If there is a mismatch, this error is reported.
|
||||
|
||||
The uploaded data is already on S3 and will stay there, though if this happens
|
||||
during a multipart upload, it may not be visible (but still billed: clean up your
|
||||
multipart uploads via the `hadoop s3guard uploads` command).
|
||||
|
||||
Possible causes for this
|
||||
|
||||
1. A (possibly transient) network problem, including hardware faults.
|
||||
1. A proxy server is doing bad things to the data.
|
||||
1. Some signing problem, especially with third-party S3-compatible object stores.
|
||||
|
||||
This is a very, very rare occurrence.
|
||||
|
||||
If the problem is a signing one, try changing the signature algorithm.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.signing-algorithm</name>
|
||||
<value>S3SignerType</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
We cannot make any promises that it will work,
|
||||
only that it has been known to make the problem go away "once"
|
||||
|
||||
### `AWSS3IOException` The Content-MD5 you specified did not match what we received
|
||||
|
||||
Reads work, but writes, even `mkdir`, fail:
|
||||
|
||||
```
|
||||
org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
|
||||
on file:/tmp/hello.txt:
|
||||
The Content-MD5 you specified did not match what we received.
|
||||
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
|
||||
S3 Extended Request ID: null
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
|
||||
at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
|
||||
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
|
||||
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
|
||||
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
|
||||
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
|
||||
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
|
||||
at java.lang.Thread.run(Thread.java:748)
|
||||
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
|
||||
The Content-MD5 you specified did not match what we received.
|
||||
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
|
||||
S3 Extended Request ID: null
|
||||
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
|
||||
at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
|
||||
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
|
||||
... 4 more
|
||||
```
|
||||
|
||||
This stack trace was seen when interacting with a third-party S3 store whose
|
||||
expectations of headers related to the AWS V4 signing mechanism was not
|
||||
compatible with that of the specific AWS SDK Hadoop was using.
|
||||
|
||||
Workaround: revert to V2 signing.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.signing-algorithm</name>
|
||||
<value>S3SignerType</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
|
||||
|
||||
|
||||
A multipart upload was trying to complete, but failed as there was no upload
|
||||
with that ID.
|
||||
|
||||
```
|
||||
java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
|
||||
com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
|
||||
The upload ID may be invalid, or the upload may have been aborted or completed.
|
||||
(Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
|
||||
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
|
||||
```
|
||||
|
||||
This can happen when all outstanding uploads have been aborted, including
|
||||
the active ones.
|
||||
|
||||
If the bucket has a lifecycle policy of deleting multipart uploads, make
|
||||
sure that the expiry time of the deletion is greater than that required
|
||||
for all open writes to complete the write,
|
||||
*and for all jobs using the S3A committers to commit their work.*
|
||||
|
||||
|
||||
### Application hangs after reading a number of files
|
||||
|
||||
|
||||
The pool of https client connections and/or IO threads have been used up,
|
||||
and none are being freed.
|
||||
|
||||
|
||||
1. The pools aren't big enough. See ["Timeout waiting for connection from pool"](#timeout_from_pool)
|
||||
2. Likely root cause: whatever code is reading files isn't calling `close()`
|
||||
on the input streams. Make sure your code does this!
|
||||
And if it's someone else's: make sure you have a recent version; search their
|
||||
issue trackers to see if its a known/fixed problem.
|
||||
If not, it's time to work with the developers, or come up with a workaround
|
||||
(i.e closing the input stream yourself).
|
||||
|
||||
|
||||
|
||||
### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
|
||||
|
||||
```
|
||||
[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
|
||||
- Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
|
||||
org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
|
||||
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
|
||||
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
|
||||
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
|
||||
at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
|
||||
at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
|
||||
at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
|
||||
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
|
||||
at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
|
||||
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
|
||||
at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
|
||||
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
|
||||
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
|
||||
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
|
||||
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
|
||||
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
|
||||
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
|
||||
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
|
||||
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
|
||||
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
|
||||
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
|
||||
at java.lang.Thread.run(Thread.java:745)
|
||||
```
|
||||
|
||||
These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client
|
||||
will attempt to retry the operation; it may just be a transient event. If there
|
||||
are many such exceptions in logs, it may be a symptom of connectivity or network
|
||||
problems.
|
||||
|
||||
### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
|
||||
|
||||
```
|
||||
Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
|
||||
com.amazonaws.services.s3.model.AmazonS3Exception:
|
||||
The unspecified location constraint is incompatible for the region specific
|
||||
endpoint this request was sent to.
|
||||
(Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
|
||||
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
|
||||
at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
|
||||
at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
|
||||
at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
|
||||
at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
|
||||
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
|
||||
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
|
||||
...
|
||||
Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
|
||||
The unspecified location constraint is incompatible for the region specific endpoint
|
||||
this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
|
||||
Request ID: EEBC5A08BCB3A645)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
|
||||
...
|
||||
```
|
||||
|
||||
Something has been trying to write data to "/".
|
||||
|
||||
## File System Semantics
|
||||
## <a name="semantics"></a>File System Semantics
|
||||
|
||||
These are the issues where S3 does not appear to behave the way a filesystem
|
||||
"should".
|
||||
"should". That's because it "isn't".
|
||||
|
||||
|
||||
### File not visible/saved
|
||||
@ -1185,7 +1039,7 @@ We also recommend using applications/application
|
||||
options which do not rename files when committing work or when copying data
|
||||
to S3, but instead write directly to the final destination.
|
||||
|
||||
## Rename not behaving as "expected"
|
||||
### Rename not behaving as "expected"
|
||||
|
||||
S3 is not a filesystem. The S3A connector mimics file and directory rename by
|
||||
|
||||
@ -1303,7 +1157,7 @@ is used, no encryption is specified, or the SSE-C specified is incorrect.
|
||||
2. A directory is encrypted with a SSE-C keyA and the user is trying to move a
|
||||
file using configured SSE-C keyB into that structure.
|
||||
|
||||
## <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream"
|
||||
### <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream"
|
||||
|
||||
|
||||
This is a message which can be generated by the Amazon SDK when the client application
|
||||
@ -1378,8 +1232,250 @@ The specified bucket does not exist
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367)
|
||||
```
|
||||
|
||||
## <a name="other"></a> Other Errors
|
||||
|
||||
## Other Issues
|
||||
### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
|
||||
|
||||
Something has happened to the data as it was uploaded.
|
||||
|
||||
```
|
||||
Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
|
||||
com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
|
||||
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
|
||||
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
|
||||
You may need to delete the data stored in Amazon S3.
|
||||
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
|
||||
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
|
||||
at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
|
||||
at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
|
||||
at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
|
||||
... 22 more
|
||||
Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
|
||||
Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
|
||||
didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
|
||||
You may need to delete the data stored in Amazon S3.
|
||||
(metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
|
||||
bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
|
||||
... 4 more
|
||||
```
|
||||
|
||||
As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
|
||||
PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
|
||||
with the local checksum. If there is a mismatch, this error is reported.
|
||||
|
||||
The uploaded data is already on S3 and will stay there, though if this happens
|
||||
during a multipart upload, it may not be visible (but still billed: clean up
|
||||
your multipart uploads via the `hadoop s3guard uploads` command).
|
||||
|
||||
Possible causes for this
|
||||
|
||||
1. A (possibly transient) network problem, including hardware faults.
|
||||
1. A proxy server is doing bad things to the data.
|
||||
1. Some signing problem, especially with third-party S3-compatible object
|
||||
stores.
|
||||
|
||||
This is a very, very rare occurrence.
|
||||
|
||||
If the problem is a signing one, try changing the signature algorithm.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.signing-algorithm</name>
|
||||
<value>S3SignerType</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
We cannot make any promises that it will work, only that it has been known to
|
||||
make the problem go away "once"
|
||||
|
||||
### `AWSS3IOException` The Content-MD5 you specified did not match what we received
|
||||
|
||||
Reads work, but writes, even `mkdir`, fail:
|
||||
|
||||
```
|
||||
org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
|
||||
on file:/tmp/hello.txt:
|
||||
The Content-MD5 you specified did not match what we received.
|
||||
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
|
||||
S3 Extended Request ID: null
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
|
||||
at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
|
||||
at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
|
||||
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
|
||||
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
|
||||
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
|
||||
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
|
||||
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
|
||||
at java.lang.Thread.run(Thread.java:748)
|
||||
Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
|
||||
The Content-MD5 you specified did not match what we received.
|
||||
(Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
|
||||
S3 Extended Request ID: null
|
||||
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
|
||||
at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
|
||||
at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
|
||||
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
|
||||
... 4 more
|
||||
```
|
||||
|
||||
This stack trace was seen when interacting with a third-party S3 store whose
|
||||
expectations of headers related to the AWS V4 signing mechanism was not
|
||||
compatible with that of the specific AWS SDK Hadoop was using.
|
||||
|
||||
Workaround: revert to V2 signing.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.signing-algorithm</name>
|
||||
<value>S3SignerType</value>
|
||||
</property>
|
||||
```
|
||||
|
||||
### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
|
||||
|
||||
A multipart upload was trying to complete, but failed as there was no upload
|
||||
with that ID.
|
||||
|
||||
```
|
||||
java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
|
||||
com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
|
||||
The upload ID may be invalid, or the upload may have been aborted or completed.
|
||||
(Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
|
||||
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
|
||||
```
|
||||
|
||||
This can happen when all outstanding uploads have been aborted, including the
|
||||
active ones.
|
||||
|
||||
If the bucket has a lifecycle policy of deleting multipart uploads, make sure
|
||||
that the expiry time of the deletion is greater than that required for all open
|
||||
writes to complete the write,
|
||||
*and for all jobs using the S3A committers to commit their work.*
|
||||
|
||||
### Application hangs after reading a number of files
|
||||
|
||||
The pool of https client connections and/or IO threads have been used up, and
|
||||
none are being freed.
|
||||
|
||||
1. The pools aren't big enough.
|
||||
See ["Timeout waiting for connection from pool"](#timeout_from_pool)
|
||||
2. Likely root cause: whatever code is reading files isn't calling `close()`
|
||||
on the input streams. Make sure your code does this!
|
||||
And if it's someone else's: make sure you have a recent version; search their
|
||||
issue trackers to see if its a known/fixed problem. If not, it's time to work
|
||||
with the developers, or come up with a workaround
|
||||
(i.e closing the input stream yourself).
|
||||
|
||||
### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
|
||||
|
||||
```
|
||||
[s3a-transfer-shared-pool4-t6] INFO http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
|
||||
- Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
|
||||
org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
|
||||
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
|
||||
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
|
||||
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
|
||||
at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
|
||||
at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
|
||||
at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
|
||||
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
|
||||
at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
|
||||
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
|
||||
at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
|
||||
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
|
||||
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
|
||||
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
|
||||
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
|
||||
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
|
||||
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
|
||||
at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
|
||||
at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
|
||||
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
|
||||
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
|
||||
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
|
||||
at java.lang.Thread.run(Thread.java:745)
|
||||
```
|
||||
|
||||
These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client
|
||||
will attempt to retry the operation; it may just be a transient event. If there
|
||||
are many such exceptions in logs, it may be a symptom of connectivity or network
|
||||
problems.
|
||||
|
||||
### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
|
||||
|
||||
```
|
||||
Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
|
||||
com.amazonaws.services.s3.model.AmazonS3Exception:
|
||||
The unspecified location constraint is incompatible for the region specific
|
||||
endpoint this request was sent to.
|
||||
(Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
|
||||
|
||||
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
|
||||
at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
|
||||
at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
|
||||
at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
|
||||
at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
|
||||
at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
|
||||
at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
|
||||
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
|
||||
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
|
||||
...
|
||||
Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
|
||||
The unspecified location constraint is incompatible for the region specific endpoint
|
||||
this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
|
||||
Request ID: EEBC5A08BCB3A645)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
|
||||
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
|
||||
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
|
||||
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
|
||||
...
|
||||
```
|
||||
|
||||
Something has been trying to write data to "/".
|
||||
|
||||
## <a name="best"></a> Best Practises
|
||||
|
||||
### <a name="logging"></a> Enabling low-level logging
|
||||
|
||||
@ -1444,10 +1540,20 @@ http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) -
|
||||
http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Content-Length: 0
|
||||
http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Server: AmazonS3
|
||||
execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be kept alive for 60000 MILLISECONDS
|
||||
|
||||
```
|
||||
|
||||
### <a name="audit-logging"></a> Enable S3 Server-side Logging
|
||||
|
||||
## <a name="retries"></a> Reducing failures by configuring retry policy
|
||||
The [Auditing](auditing) feature of the S3A connector can be used to generate
|
||||
S3 Server Logs with information which can be used to debug problems
|
||||
working with S3, such as throttling events.
|
||||
|
||||
Consult the [auditing documentation](auditing) documentation.
|
||||
As auditing is enabled by default, enabling S3 Logging for a bucket
|
||||
should be sufficient to collect these logs.
|
||||
|
||||
### <a name="retries"></a> Reducing failures by configuring retry policy
|
||||
|
||||
The S3A client can ba configured to retry those operations which are considered
|
||||
retryable. That can be because they are idempotent, or
|
||||
|
@ -18,13 +18,25 @@
|
||||
|
||||
package org.apache.hadoop.fs.s3a;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
import com.amazonaws.ClientConfiguration;
|
||||
import com.amazonaws.client.builder.AwsClientBuilder;
|
||||
import com.amazonaws.services.s3.AmazonS3;
|
||||
import com.amazonaws.util.AwsHostNameUtils;
|
||||
import org.assertj.core.api.Assertions;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_REGION_SYSPROP;
|
||||
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||
|
||||
/**
|
||||
* Test to check correctness of S3A endpoint regions in
|
||||
@ -36,6 +48,7 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
|
||||
private static final String AWS_ENDPOINT_TEST = "test-endpoint";
|
||||
private static final String AWS_ENDPOINT_TEST_WITH_REGION =
|
||||
"test-endpoint.some-region.amazonaws.com";
|
||||
public static final String MARS_NORTH_2 = "mars-north-2";
|
||||
|
||||
/**
|
||||
* Test to verify that setting a region with the config would bypass the
|
||||
@ -88,4 +101,90 @@ private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint,
|
||||
return DefaultS3ClientFactory.createEndpointConfiguration(endpoint,
|
||||
new ClientConfiguration(), awsRegion);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testInvalidRegionDefaultEndpoint() throws Throwable {
|
||||
describe("Create a client with an invalid region and the default endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
// we are making a big assumption about the timetable for AWS
|
||||
// region rollout.
|
||||
// if this test ever fails because this region now exists
|
||||
// -congratulations!
|
||||
conf.set(AWS_REGION, MARS_NORTH_2);
|
||||
createMarsNorth2Client(conf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnsetRegionDefaultEndpoint() throws Throwable {
|
||||
describe("Create a client with no region and the default endpoint");
|
||||
Configuration conf = getConfiguration();
|
||||
conf.unset(AWS_REGION);
|
||||
createS3Client(conf, DEFAULT_ENDPOINT, AWS_S3_CENTRAL_REGION);
|
||||
}
|
||||
|
||||
/**
|
||||
* By setting the system property {@code "aws.region"} we can
|
||||
* guarantee that the SDK region resolution chain will always succeed
|
||||
* (and fast).
|
||||
* Clearly there is no validation of the region during the build process.
|
||||
*/
|
||||
@Test
|
||||
public void testBlankRegionTriggersSDKResolution() throws Throwable {
|
||||
describe("Create a client with a blank region and the default endpoint."
|
||||
+ " This will trigger the SDK Resolution chain");
|
||||
Configuration conf = getConfiguration();
|
||||
conf.set(AWS_REGION, "");
|
||||
System.setProperty(AWS_REGION_SYSPROP, MARS_NORTH_2);
|
||||
try {
|
||||
createMarsNorth2Client(conf);
|
||||
} finally {
|
||||
System.clearProperty(AWS_REGION_SYSPROP);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an S3 client bonded to an invalid region;
|
||||
* verify that calling {@code getRegion()} triggers
|
||||
* a failure.
|
||||
* @param conf configuration to use in the building.
|
||||
*/
|
||||
private void createMarsNorth2Client(Configuration conf) throws Exception {
|
||||
AmazonS3 client = createS3Client(conf, DEFAULT_ENDPOINT, MARS_NORTH_2);
|
||||
intercept(IllegalArgumentException.class, MARS_NORTH_2, client::getRegion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an S3 client with the given conf and endpoint.
|
||||
* The region name must then match that of the expected
|
||||
* value.
|
||||
* @param conf configuration to use.
|
||||
* @param endpoint endpoint.
|
||||
* @param expectedRegion expected region
|
||||
* @return the client.
|
||||
* @throws URISyntaxException parse problems.
|
||||
* @throws IOException IO problems
|
||||
*/
|
||||
private AmazonS3 createS3Client(Configuration conf,
|
||||
String endpoint,
|
||||
String expectedRegion)
|
||||
throws URISyntaxException, IOException {
|
||||
|
||||
DefaultS3ClientFactory factory
|
||||
= new DefaultS3ClientFactory();
|
||||
factory.setConf(conf);
|
||||
S3ClientFactory.S3ClientCreationParameters parameters
|
||||
= new S3ClientFactory.S3ClientCreationParameters()
|
||||
.withCredentialSet(new AnonymousAWSCredentialsProvider())
|
||||
.withEndpoint(endpoint)
|
||||
.withMetrics(new EmptyS3AStatisticsContext()
|
||||
.newStatisticsFromAwsSdk());
|
||||
AmazonS3 client = factory.createS3Client(
|
||||
new URI("s3a://localhost/"),
|
||||
parameters);
|
||||
Assertions.assertThat(client.getRegionName())
|
||||
.describedAs("Client region name")
|
||||
.isEqualTo(expectedRegion);
|
||||
return client;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user