HADOOP-15563. S3Guard to support creating on-demand DDB tables.
Contributed by Steve Loughran Change-Id: I2262b5b9f52e42ded8ed6f50fd39756f96e77087
This commit is contained in:
parent
85479577da
commit
4e38dafde4
@ -1581,23 +1581,27 @@
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
|
||||
<value>500</value>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Provisioned throughput requirements for read operations in terms of capacity
|
||||
units for the DynamoDB table. This config value will only be used when
|
||||
creating a new DynamoDB table, though later you can manually provision by
|
||||
increasing or decreasing read capacity as needed for existing tables.
|
||||
See DynamoDB documents for more information.
|
||||
units for the DynamoDB table. This config value will only be used when
|
||||
creating a new DynamoDB table.
|
||||
If set to 0 (the default), new tables are created with "per-request" capacity.
|
||||
If a positive integer is provided for this and the write capacity, then
|
||||
a table with "provisioned capacity" will be created.
|
||||
You can change the capacity of an existing provisioned-capacity table
|
||||
through the "s3guard set-capacity" command.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
|
||||
<value>100</value>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Provisioned throughput requirements for write operations in terms of
|
||||
capacity units for the DynamoDB table. Refer to related config
|
||||
fs.s3a.s3guard.ddb.table.capacity.read before usage.
|
||||
capacity units for the DynamoDB table.
|
||||
If set to 0 (the default), new tables are created with "per-request" capacity.
|
||||
Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
@ -439,7 +439,6 @@ private Constants() {
|
||||
* This config has no default value. If the user does not set this, the
|
||||
* S3Guard will operate table in the associated S3 bucket region.
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_REGION_KEY =
|
||||
"fs.s3a.s3guard.ddb.region";
|
||||
|
||||
@ -449,7 +448,6 @@ private Constants() {
|
||||
* This config has no default value. If the user does not set this, the
|
||||
* S3Guard implementation will use the respective S3 bucket name.
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_TABLE_NAME_KEY =
|
||||
"fs.s3a.s3guard.ddb.table";
|
||||
|
||||
@ -459,36 +457,45 @@ private Constants() {
|
||||
* For example:
|
||||
* fs.s3a.s3guard.ddb.table.tag.mytag
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_TABLE_TAG =
|
||||
"fs.s3a.s3guard.ddb.table.tag.";
|
||||
|
||||
/**
|
||||
* Test table name to use during DynamoDB integration test.
|
||||
*
|
||||
* The table will be modified, and deleted in the end of the tests.
|
||||
* If this value is not set, the integration tests that would be destructive
|
||||
* won't run.
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_TEST_TABLE_NAME_KEY =
|
||||
"fs.s3a.s3guard.ddb.test.table";
|
||||
|
||||
/**
|
||||
* Whether to create the DynamoDB table if the table does not exist.
|
||||
* Value: {@value}.
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_TABLE_CREATE_KEY =
|
||||
"fs.s3a.s3guard.ddb.table.create";
|
||||
|
||||
@InterfaceStability.Unstable
|
||||
/**
|
||||
* Read capacity when creating a table.
|
||||
* When it and the write capacity are both "0", a per-request table is
|
||||
* created.
|
||||
* Value: {@value}.
|
||||
*/
|
||||
public static final String S3GUARD_DDB_TABLE_CAPACITY_READ_KEY =
|
||||
"fs.s3a.s3guard.ddb.table.capacity.read";
|
||||
public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 500;
|
||||
@InterfaceStability.Unstable
|
||||
|
||||
/**
|
||||
* Default read capacity when creating a table.
|
||||
* Value: {@value}.
|
||||
*/
|
||||
public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 0;
|
||||
|
||||
/**
|
||||
* Write capacity when creating a table.
|
||||
* When it and the read capacity are both "0", a per-request table is
|
||||
* created.
|
||||
* Value: {@value}.
|
||||
*/
|
||||
public static final String S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY =
|
||||
"fs.s3a.s3guard.ddb.table.capacity.write";
|
||||
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 100;
|
||||
|
||||
/**
|
||||
* Default write capacity when creating a table.
|
||||
* Value: {@value}.
|
||||
*/
|
||||
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 0;
|
||||
|
||||
/**
|
||||
* The maximum put or delete requests per BatchWriteItem request.
|
||||
@ -497,7 +504,6 @@ private Constants() {
|
||||
*/
|
||||
public static final int S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT = 25;
|
||||
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_MAX_RETRIES =
|
||||
"fs.s3a.s3guard.ddb.max.retries";
|
||||
|
||||
@ -509,7 +515,6 @@ private Constants() {
|
||||
public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT =
|
||||
DEFAULT_MAX_ERROR_RETRIES;
|
||||
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL =
|
||||
"fs.s3a.s3guard.ddb.throttle.retry.interval";
|
||||
public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT =
|
||||
@ -528,7 +533,6 @@ private Constants() {
|
||||
/**
|
||||
* The default "Null" metadata store: {@value}.
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_METASTORE_NULL
|
||||
= "org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore";
|
||||
|
||||
@ -561,7 +565,6 @@ private Constants() {
|
||||
/**
|
||||
* Use DynamoDB for the metadata: {@value}.
|
||||
*/
|
||||
@InterfaceStability.Unstable
|
||||
public static final String S3GUARD_METASTORE_DYNAMO
|
||||
= "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore";
|
||||
|
||||
|
@ -59,6 +59,7 @@
|
||||
import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec;
|
||||
import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec;
|
||||
import com.amazonaws.services.dynamodbv2.document.utils.ValueMap;
|
||||
import com.amazonaws.services.dynamodbv2.model.BillingMode;
|
||||
import com.amazonaws.services.dynamodbv2.model.CreateTableRequest;
|
||||
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
|
||||
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
|
||||
@ -1259,11 +1260,26 @@ void initTable() throws IOException {
|
||||
tableName, region, (created != null) ? new Date(created) : null);
|
||||
} catch (ResourceNotFoundException rnfe) {
|
||||
if (conf.getBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, false)) {
|
||||
final ProvisionedThroughput capacity = new ProvisionedThroughput(
|
||||
conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY,
|
||||
S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT),
|
||||
conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY,
|
||||
S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT));
|
||||
long readCapacity = conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY,
|
||||
S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT);
|
||||
long writeCapacity = conf.getLong(
|
||||
S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY,
|
||||
S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT);
|
||||
ProvisionedThroughput capacity;
|
||||
if (readCapacity > 0 && writeCapacity > 0) {
|
||||
capacity = new ProvisionedThroughput(
|
||||
readCapacity,
|
||||
writeCapacity);
|
||||
} else {
|
||||
// at least one capacity value is <= 0
|
||||
// verify they are both exactly zero
|
||||
Preconditions.checkArgument(
|
||||
readCapacity == 0 && writeCapacity == 0,
|
||||
"S3Guard table read capacity %d and and write capacity %d"
|
||||
+ " are inconsistent", readCapacity, writeCapacity);
|
||||
// and set the capacity to null for per-request billing.
|
||||
capacity = null;
|
||||
}
|
||||
|
||||
createTable(capacity);
|
||||
} else {
|
||||
@ -1403,20 +1419,31 @@ private void waitForTableActive(Table t) throws IOException {
|
||||
* marker.
|
||||
* Creating an setting up the table isn't wrapped by any retry operations;
|
||||
* the wait for a table to become available is RetryTranslated.
|
||||
* @param capacity capacity to provision
|
||||
* @param capacity capacity to provision. If null: create a per-request
|
||||
* table.
|
||||
* @throws IOException on any failure.
|
||||
* @throws InterruptedIOException if the wait was interrupted
|
||||
*/
|
||||
@Retries.OnceRaw
|
||||
private void createTable(ProvisionedThroughput capacity) throws IOException {
|
||||
try {
|
||||
LOG.info("Creating non-existent DynamoDB table {} in region {}",
|
||||
tableName, region);
|
||||
table = dynamoDB.createTable(new CreateTableRequest()
|
||||
String mode;
|
||||
CreateTableRequest request = new CreateTableRequest()
|
||||
.withTableName(tableName)
|
||||
.withKeySchema(keySchema())
|
||||
.withAttributeDefinitions(attributeDefinitions())
|
||||
.withProvisionedThroughput(capacity));
|
||||
.withAttributeDefinitions(attributeDefinitions());
|
||||
if (capacity != null) {
|
||||
mode = String.format("with provisioned read capacity %d and"
|
||||
+ " write capacity %s",
|
||||
capacity.getReadCapacityUnits(), capacity.getWriteCapacityUnits());
|
||||
request.withProvisionedThroughput(capacity);
|
||||
} else {
|
||||
mode = "with pay-per-request billing";
|
||||
request.withBillingMode(BillingMode.PAY_PER_REQUEST);
|
||||
}
|
||||
LOG.info("Creating non-existent DynamoDB table {} in region {} {}",
|
||||
tableName, region, mode);
|
||||
table = dynamoDB.createTable(request);
|
||||
LOG.debug("Awaiting table becoming active");
|
||||
} catch (ResourceInUseException e) {
|
||||
LOG.warn("ResourceInUseException while creating DynamoDB table {} "
|
||||
@ -1446,13 +1473,21 @@ private PutItemOutcome putItem(Item item) {
|
||||
* Provision the table with given read and write capacity units.
|
||||
* Call will fail if the table is busy, or the new values match the current
|
||||
* ones.
|
||||
* @param readCapacity read units
|
||||
* @param writeCapacity write units
|
||||
* <p>
|
||||
* Until the AWS SDK lets us switch a table to on-demand, an attempt to
|
||||
* set the I/O capacity to zero will fail.
|
||||
* @param readCapacity read units: must be greater than zero
|
||||
* @param writeCapacity write units: must be greater than zero
|
||||
* @throws IOException on a failure
|
||||
*/
|
||||
@Retries.RetryTranslated
|
||||
void provisionTable(Long readCapacity, Long writeCapacity)
|
||||
throws IOException {
|
||||
|
||||
if (readCapacity == 0 || writeCapacity == 0) {
|
||||
// table is pay on demand
|
||||
throw new IOException(E_ON_DEMAND_NO_SET_CAPACITY);
|
||||
}
|
||||
final ProvisionedThroughput toProvision = new ProvisionedThroughput()
|
||||
.withReadCapacityUnits(readCapacity)
|
||||
.withWriteCapacityUnits(writeCapacity);
|
||||
|
@ -434,7 +434,9 @@ static class Init extends S3GuardTool {
|
||||
"\n" +
|
||||
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
|
||||
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
|
||||
" is not supported.";
|
||||
" is not supported.\n"
|
||||
+ "To create a table with per-request billing, set the read and write\n"
|
||||
+ "capacities to 0";
|
||||
|
||||
Init(Configuration conf) {
|
||||
super(conf);
|
||||
|
@ -251,9 +251,11 @@ this sets the table name to `my-ddb-table-name`
|
||||
</property>
|
||||
```
|
||||
|
||||
It is good to share a table across multiple buckets for multiple reasons.
|
||||
It is good to share a table across multiple buckets for multiple reasons,
|
||||
especially if you are *not* using on-demand DynamoDB tables, and instead
|
||||
prepaying for provisioned I/O capacity.
|
||||
|
||||
1. You are billed for the I/O capacity allocated to the table,
|
||||
1. You are billed for the provisioned I/O capacity allocated to the table,
|
||||
*even when the table is not used*. Sharing capacity can reduce costs.
|
||||
|
||||
1. You can share the "provision burden" across the buckets. That is, rather
|
||||
@ -265,8 +267,13 @@ lower.
|
||||
S3Guard, because there is only one table to review and configure in the
|
||||
AWS management console.
|
||||
|
||||
1. When you don't grant the permission to create DynamoDB tables to users.
|
||||
A single pre-created table for all buckets avoids the needs for an administrator
|
||||
to create one for every bucket.
|
||||
|
||||
When wouldn't you want to share a table?
|
||||
|
||||
1. When you are using on-demand DynamoDB and want to keep each table isolated.
|
||||
1. When you do explicitly want to provision I/O capacity to a specific bucket
|
||||
and table, isolated from others.
|
||||
|
||||
@ -315,18 +322,25 @@ Next, you can choose whether or not the table will be automatically created
|
||||
</property>
|
||||
```
|
||||
|
||||
### 7. If creating a table: Set your DynamoDB I/O Capacity
|
||||
### 7. If creating a table: Choose your billing mode (and perhaps I/O Capacity)
|
||||
|
||||
Next, you need to set the DynamoDB read and write throughput requirements you
|
||||
expect to need for your cluster. Setting higher values will cost you more
|
||||
money. *Note* that these settings only affect table creation when
|
||||
Next, you need to decide whether to use On-Demand DynamoDB and its
|
||||
pay-per-request billing (recommended), or to explicitly request a
|
||||
provisioned IO capacity.
|
||||
|
||||
Before AWS offered pay-per-request billing, the sole billing mechanism,
|
||||
was "provisioned capacity". This mechanism requires you to choose
|
||||
the DynamoDB read and write throughput requirements you
|
||||
expect to need for your expected uses of the S3Guard table.
|
||||
Setting higher values cost you more money -*even when the table was idle*
|
||||
*Note* that these settings only affect table creation when
|
||||
`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for
|
||||
an existing table, use the AWS console or CLI tool.
|
||||
|
||||
For more details on DynamoDB capacity units, see the AWS page on [Capacity
|
||||
Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
|
||||
|
||||
The charges are incurred per hour for the life of the table, *even when the
|
||||
Provisioned IO capacity is billed per hour for the life of the table, *even when the
|
||||
table and the underlying S3 buckets are not being used*.
|
||||
|
||||
There are also charges incurred for data storage and for data I/O outside of the
|
||||
@ -334,34 +348,56 @@ region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path
|
||||
and summary details of objects —the actual data is stored in S3, so billed at S3
|
||||
rates.
|
||||
|
||||
With provisioned I/O capacity, attempting to perform more I/O than the capacity
|
||||
requested throttles the operation and may result in operations failing.
|
||||
Larger I/O capacities cost more.
|
||||
|
||||
With the introduction of On-Demand DynamoDB, you can now avoid paying for
|
||||
provisioned capacity by creating an on-demand table.
|
||||
With an on-demand table you are not throttled if your DynamoDB requests exceed
|
||||
any pre-provisioned limit, nor do you pay per hour even when a table is idle.
|
||||
|
||||
You do, however, pay more per DynamoDB operation.
|
||||
Even so, the ability to cope with sudden bursts of read or write requests, combined
|
||||
with the elimination of charges for idle tables, suit the use patterns made of
|
||||
S3Guard tables by applications interacting with S3. That is: periods when the table
|
||||
is rarely used, with intermittent high-load operations when directory trees
|
||||
are scanned (query planning and similar), or updated (rename and delete operations).
|
||||
|
||||
|
||||
We recommending using On-Demand DynamoDB for maximum performance in operations
|
||||
such as query planning, and lowest cost when S3 buckets are not being accessed.
|
||||
|
||||
This is the default, as configured in the default configuration options.
|
||||
|
||||
```xml
|
||||
<property>
|
||||
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
|
||||
<value>500</value>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Provisioned throughput requirements for read operations in terms of capacity
|
||||
units for the DynamoDB table. This config value will only be used when
|
||||
creating a new DynamoDB table, though later you can manually provision by
|
||||
increasing or decreasing read capacity as needed for existing tables.
|
||||
See DynamoDB documents for more information.
|
||||
units for the DynamoDB table. This config value will only be used when
|
||||
creating a new DynamoDB table.
|
||||
If set to 0 (the default), new tables are created with "per-request" capacity.
|
||||
If a positive integer is provided for this and the write capacity, then
|
||||
a table with "provisioned capacity" will be created.
|
||||
You can change the capacity of an existing provisioned-capacity table
|
||||
through the "s3guard set-capacity" command.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
|
||||
<value>100</value>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Provisioned throughput requirements for write operations in terms of
|
||||
capacity units for the DynamoDB table. Refer to related config
|
||||
fs.s3a.s3guard.ddb.table.capacity.read before usage.
|
||||
capacity units for the DynamoDB table.
|
||||
If set to 0 (the default), new tables are created with "per-request" capacity.
|
||||
Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read
|
||||
</description>
|
||||
</property>
|
||||
```
|
||||
|
||||
Attempting to perform more I/O than the capacity requested throttles the
|
||||
I/O, and may result in operations failing. Larger I/O capacities cost more.
|
||||
We recommending using small read and write capacities when initially experimenting
|
||||
with S3Guard, and considering DynamoDB On-Demand.
|
||||
|
||||
## Authenticating with S3Guard
|
||||
|
||||
@ -369,9 +405,7 @@ The DynamoDB metadata store takes advantage of the fact that the DynamoDB
|
||||
service uses the same authentication mechanisms as S3. S3Guard
|
||||
gets all its credentials from the S3A client that is using it.
|
||||
|
||||
All existing S3 authentication mechanisms can be used, except for one
|
||||
exception. Credentials placed in URIs are not supported for S3Guard, for security
|
||||
reasons.
|
||||
All existing S3 authentication mechanisms can be used.
|
||||
|
||||
## Per-bucket S3Guard configuration
|
||||
|
||||
@ -512,7 +546,13 @@ hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
|
||||
Creates and initializes an empty metadata store.
|
||||
|
||||
A DynamoDB metadata store can be initialized with additional parameters
|
||||
pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html):
|
||||
pertaining to capacity.
|
||||
|
||||
If these values are both zero, then an on-demand DynamoDB table is created;
|
||||
if positive values then they set the
|
||||
[Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html)
|
||||
of the table.
|
||||
|
||||
|
||||
```bash
|
||||
[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
|
||||
@ -528,29 +568,31 @@ metadata store will be created with these tags in DynamoDB.
|
||||
Example 1
|
||||
|
||||
```bash
|
||||
hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1
|
||||
hadoop s3guard init -meta dynamodb://ireland-team -write 0 -read 0 s3a://ireland-1
|
||||
```
|
||||
|
||||
Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads,
|
||||
in the same location as the bucket "ireland-1".
|
||||
Creates an on-demand table "ireland-team",
|
||||
in the same location as the S3 bucket "ireland-1".
|
||||
|
||||
|
||||
Example 2
|
||||
|
||||
```bash
|
||||
hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1
|
||||
hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 --read 0 --write 0
|
||||
```
|
||||
|
||||
Creates a table "ireland-team" in the region "eu-west-1.amazonaws.com"
|
||||
|
||||
|
||||
Example 3
|
||||
|
||||
```bash
|
||||
hadoop s3guard init -meta dynamodb://ireland-team -tag tag1=first;tag2=second;
|
||||
```
|
||||
|
||||
Creates a table "ireland-team" with tags "first" and "second".
|
||||
Creates a table "ireland-team" with tags "first" and "second". The read and
|
||||
write capacity will be those of the site configuration's values of
|
||||
`fs.s3a.s3guard.ddb.table.capacity.read` and `fs.s3a.s3guard.ddb.table.capacity.write`;
|
||||
if these are both zero then it will be an on-demand table.
|
||||
|
||||
### Import a bucket: `s3guard import`
|
||||
|
||||
@ -588,7 +630,7 @@ hadoop s3guard diff s3a://ireland-1
|
||||
Prints and optionally checks the s3guard and encryption status of a bucket.
|
||||
|
||||
```bash
|
||||
hadoop s3guard bucket-info [ -guarded ] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] s3a://BUCKET
|
||||
hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] s3a://BUCKET
|
||||
```
|
||||
|
||||
Options
|
||||
@ -788,7 +830,8 @@ the region "eu-west-1".
|
||||
|
||||
### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity`
|
||||
|
||||
Alter the read and/or write capacity of a s3guard table.
|
||||
Alter the read and/or write capacity of a s3guard table created with provisioned
|
||||
I/O capacity.
|
||||
|
||||
```bash
|
||||
hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a://BUCKET )
|
||||
@ -796,6 +839,9 @@ hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a:
|
||||
|
||||
The `--read` and `--write` units are those of `s3guard init`.
|
||||
|
||||
It cannot be used to change the I/O capacity of an on demand table (there is
|
||||
no need), and nor can it be used to convert an existing table to being
|
||||
on-demand. For that the AWS console must be used.
|
||||
|
||||
Example
|
||||
|
||||
@ -932,10 +978,10 @@ merits more testing before it could be considered reliable.
|
||||
|
||||
## Managing DynamoDB I/O Capacity
|
||||
|
||||
By default, DynamoDB is not only billed on use (data and I/O requests)
|
||||
-it is billed on allocated I/O Capacity.
|
||||
Historically, DynamoDB has been not only billed on use (data and I/O requests)
|
||||
-but on provisioned I/O Capacity.
|
||||
|
||||
When an application makes more requests than
|
||||
With Provisioned IO, when an application makes more requests than
|
||||
the allocated capacity permits, the request is rejected; it is up to
|
||||
the calling application to detect when it is being so throttled and
|
||||
react. S3Guard does this, but as a result: when the client is being
|
||||
@ -943,7 +989,7 @@ throttled, operations are slower. This capacity throttling is averaged
|
||||
over a few minutes: a briefly overloaded table will not be throttled,
|
||||
but the rate cannot be sustained.
|
||||
|
||||
The load on a table isvisible in the AWS console: go to the
|
||||
The load on a table is visible in the AWS console: go to the
|
||||
DynamoDB page for the table and select the "metrics" tab.
|
||||
If the graphs of throttled read or write
|
||||
requests show that a lot of throttling has taken place, then there is not
|
||||
@ -1015,20 +1061,33 @@ for S3Guard applications.
|
||||
* There's no explicit limit on I/O capacity, so operations which make
|
||||
heavy use of S3Guard tables (for example: SQL query planning) do not
|
||||
get throttled.
|
||||
* You are charged more per DynamoDB API call, in exchange for paying nothing
|
||||
when you are not interacting with DynamoDB.
|
||||
* There's no way put a limit on the I/O; you may unintentionally run up
|
||||
large bills through sustained heavy load.
|
||||
* The `s3guard set-capacity` command fails: it does not make sense any more.
|
||||
|
||||
When idle, S3Guard tables are only billed for the data stored, not for
|
||||
any unused capacity. For this reason, there is no benefit from sharing
|
||||
a single S3Guard table across multiple buckets.
|
||||
any unused capacity. For this reason, there is no performance benefit
|
||||
from sharing a single S3Guard table across multiple buckets.
|
||||
|
||||
*Enabling DynamoDB On-Demand for a S3Guard table*
|
||||
*Creating a S3Guard Table with On-Demand Tables*
|
||||
|
||||
You cannot currently enable DynamoDB on-demand from the `s3guard` command
|
||||
when creating or updating a bucket.
|
||||
The default settings for S3Guard are to create on-demand tables; this
|
||||
can also be done explicitly in the `s3guard init` command by setting the
|
||||
read and write capacities to zero.
|
||||
|
||||
Instead it must be done through the AWS console or [the CLI](https://docs.aws.amazon.com/cli/latest/reference/dynamodb/update-table.html).
|
||||
|
||||
```bash
|
||||
hadoop s3guard init -meta dynamodb://ireland-team -write 0 -read 0 s3a://ireland-1
|
||||
```
|
||||
|
||||
*Enabling DynamoDB On-Demand for an existing S3Guard table*
|
||||
|
||||
You cannot currently convert an existing S3Guard table to being an on-demand
|
||||
table through the `s3guard` command.
|
||||
|
||||
It can be done through the AWS console or [the CLI](https://docs.aws.amazon.com/cli/latest/reference/dynamodb/update-table.html).
|
||||
From the Web console or the command line, switch the billing to pay-per-request.
|
||||
|
||||
Once enabled, the read and write capacities of the table listed in the
|
||||
@ -1078,7 +1137,7 @@ Metadata Store Diagnostics:
|
||||
The "magic" committer is supported
|
||||
```
|
||||
|
||||
### <a name="autoscaling"></a> Autoscaling S3Guard tables.
|
||||
### <a name="autoscaling"></a> Autoscaling (Provisioned Capacity) S3Guard tables.
|
||||
|
||||
[DynamoDB Auto Scaling](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AutoScaling.html)
|
||||
can automatically increase and decrease the allocated capacity.
|
||||
@ -1093,7 +1152,7 @@ until any extra capacity is allocated. Furthermore, as this retrying will
|
||||
block the threads from performing other operations -including more I/O, the
|
||||
the autoscale may not scale fast enough.
|
||||
|
||||
This is why the DynamoDB On-Demand appears to be a better option for
|
||||
This is why the DynamoDB On-Demand appears is a better option for
|
||||
workloads with Hadoop, Spark, Hive and other applications.
|
||||
|
||||
If autoscaling is to be used, we recommend experimenting with the option,
|
||||
@ -1259,18 +1318,18 @@ Error Code: ProvisionedThroughputExceededException;
|
||||
```
|
||||
The I/O load of clients of the (shared) DynamoDB table was exceeded.
|
||||
|
||||
1. Increase the capacity of the DynamoDB table.
|
||||
1. Increase the retry count and/or sleep time of S3Guard on throttle events.
|
||||
1. Enable capacity autoscaling for the table in the AWS console.
|
||||
1. Switch to On-Demand Dynamo DB tables (AWS console)
|
||||
1. Increase the capacity of the DynamoDB table (AWS console or `s3guard set-capacity`)/
|
||||
1. Increase the retry count and/or sleep time of S3Guard on throttle events (Hadoop configuration).
|
||||
|
||||
### Error `Max retries exceeded`
|
||||
|
||||
The I/O load of clients of the (shared) DynamoDB table was exceeded, and
|
||||
the number of attempts to retry the operation exceeded the configured amount.
|
||||
|
||||
1. Switch to On-Demand Dynamo DB tables (AWS console).
|
||||
1. Increase the capacity of the DynamoDB table.
|
||||
1. Increase the retry count and/or sleep time of S3Guard on throttle events.
|
||||
1. Enable capacity autoscaling for the table in the AWS console.
|
||||
|
||||
|
||||
### Error when running `set-capacity`: `org.apache.hadoop.fs.s3a.AWSServiceThrottledException: ProvisionTable`
|
||||
@ -1286,7 +1345,7 @@ Next decrease can be made at Wednesday, July 25, 2018 9:48:14 PM UTC
|
||||
```
|
||||
|
||||
There's are limit on how often you can change the capacity of an DynamoDB table;
|
||||
if you call set-capacity too often, it fails. Wait until the after the time indicated
|
||||
if you call `set-capacity` too often, it fails. Wait until the after the time indicated
|
||||
and try again.
|
||||
|
||||
### Error `Invalid region specified`
|
||||
|
@ -197,4 +197,14 @@ public interface S3ATestConstants {
|
||||
Duration TEST_SESSION_TOKEN_DURATION = Duration.ofSeconds(
|
||||
TEST_SESSION_TOKEN_DURATION_SECONDS);
|
||||
|
||||
/**
|
||||
* Test table name to use during DynamoDB integration tests in
|
||||
* {@code ITestDynamoDBMetadataStore}.
|
||||
*
|
||||
* The table will be modified, and deleted in the end of the tests.
|
||||
* If this value is not set, the integration tests that would be destructive
|
||||
* won't run.
|
||||
*/
|
||||
String S3GUARD_DDB_TEST_TABLE_NAME_KEY =
|
||||
"fs.s3a.s3guard.ddb.test.table";
|
||||
}
|
||||
|
@ -59,7 +59,6 @@
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestDynamoTablePrefix;
|
||||
import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.E_BAD_STATE;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
|
||||
@ -332,7 +331,14 @@ public void testSetCapacityFailFastOnReadWriteOfZero() throws Exception{
|
||||
@Test
|
||||
public void testBucketInfoUnguarded() throws Exception {
|
||||
final Configuration conf = getConfiguration();
|
||||
URI fsUri = getFileSystem().getUri();
|
||||
conf.set(S3GUARD_DDB_TABLE_CREATE_KEY, Boolean.FALSE.toString());
|
||||
String bucket = fsUri.getHost();
|
||||
clearBucketOption(conf, bucket,
|
||||
S3GUARD_DDB_TABLE_CREATE_KEY);
|
||||
clearBucketOption(conf, bucket, S3_METADATA_STORE_IMPL);
|
||||
clearBucketOption(conf, bucket, S3GUARD_DDB_TABLE_NAME_KEY);
|
||||
conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL);
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY,
|
||||
"testBucketInfoUnguarded-" + UUID.randomUUID());
|
||||
|
||||
@ -341,7 +347,7 @@ public void testBucketInfoUnguarded() throws Exception {
|
||||
S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf);
|
||||
String info = exec(infocmd, S3GuardTool.BucketInfo.NAME,
|
||||
"-" + S3GuardTool.BucketInfo.UNGUARDED_FLAG,
|
||||
getFileSystem().getUri().toString());
|
||||
fsUri.toString());
|
||||
|
||||
assertTrue("Output should contain information about S3A client " + info,
|
||||
info.contains("S3A Client"));
|
||||
|
@ -21,10 +21,15 @@
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
|
||||
import org.junit.Assert;
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.READ_CAPACITY;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.WRITE_CAPACITY;
|
||||
|
||||
/**
|
||||
* Tuple of read and write capacity of a DDB table.
|
||||
*/
|
||||
class DDBCapacities {
|
||||
private final long read, write;
|
||||
|
||||
@ -49,12 +54,6 @@ String getWriteStr() {
|
||||
return Long.toString(write);
|
||||
}
|
||||
|
||||
void checkEquals(String text, DDBCapacities that) throws Exception {
|
||||
if (!this.equals(that)) {
|
||||
throw new Exception(text + " expected = " + this +"; actual = "+ that);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
@ -82,7 +81,7 @@ public String toString() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the the capacity that of a pay-on-demand table?
|
||||
* Is the the capacity that of an On-Demand table?
|
||||
* @return true if the capacities are both 0.
|
||||
*/
|
||||
public boolean isOnDemandTable() {
|
||||
@ -102,7 +101,19 @@ public static DDBCapacities extractCapacities(
|
||||
read);
|
||||
return new DDBCapacities(
|
||||
Long.parseLong(read),
|
||||
Long.parseLong(diagnostics.get(DynamoDBMetadataStore.WRITE_CAPACITY)));
|
||||
Long.parseLong(diagnostics.get(WRITE_CAPACITY)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a throughput information from table.describe(), build
|
||||
* a DDBCapacities object.
|
||||
* @param throughput throughput description.
|
||||
* @return the capacities
|
||||
*/
|
||||
public static DDBCapacities extractCapacities(
|
||||
ProvisionedThroughputDescription throughput) {
|
||||
return new DDBCapacities(throughput.getReadCapacityUnits(),
|
||||
throughput.getWriteCapacityUnits());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -33,7 +33,6 @@
|
||||
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
|
||||
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||
import com.amazonaws.services.dynamodbv2.model.ListTagsOfResourceRequest;
|
||||
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
|
||||
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
|
||||
import com.amazonaws.services.dynamodbv2.model.TableDescription;
|
||||
|
||||
@ -43,6 +42,7 @@
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
||||
import org.apache.hadoop.fs.s3a.Constants;
|
||||
import org.apache.hadoop.fs.s3a.S3ATestConstants;
|
||||
import org.apache.hadoop.fs.s3a.Tristate;
|
||||
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
@ -64,6 +64,7 @@
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
|
||||
import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
|
||||
import static org.apache.hadoop.test.LambdaTestUtils.*;
|
||||
@ -78,7 +79,15 @@
|
||||
*
|
||||
* According to the base class, every test case will have independent contract
|
||||
* to create a new {@link S3AFileSystem} instance and initializes it.
|
||||
* A table will be created and shared between the tests,
|
||||
* A table will be created and shared between the tests; some tests also
|
||||
* create their own.
|
||||
*
|
||||
* Important: Any new test which creates a table must do the following
|
||||
* <ol>
|
||||
* <li>Enable on-demand pricing.</li>
|
||||
* <li>Always destroy the table, even if an assertion fails.</li>
|
||||
* </ol>
|
||||
* This is needed to avoid "leaking" DDB tables and running up bills.
|
||||
*/
|
||||
public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||
|
||||
@ -121,7 +130,7 @@ public void setUp() throws Exception {
|
||||
Assume.assumeTrue("Test DynamoDB table name should be set to run "
|
||||
+ "integration tests.", testDynamoDBTableName != null);
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName);
|
||||
|
||||
enableOnDemand(conf);
|
||||
s3AContract = new S3AContract(conf);
|
||||
s3AContract.init();
|
||||
|
||||
@ -141,36 +150,40 @@ public void setUp() throws Exception {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClassSetup() throws IOException {
|
||||
Configuration conf = prepareTestConfiguration(new Configuration());
|
||||
assumeThatDynamoMetadataStoreImpl(conf);
|
||||
// S3GUARD_DDB_TEST_TABLE_NAME_KEY and S3GUARD_DDB_TABLE_NAME_KEY should
|
||||
// be configured to use this test.
|
||||
testDynamoDBTableName = conf.get(S3GUARD_DDB_TEST_TABLE_NAME_KEY);
|
||||
testDynamoDBTableName = conf.get(
|
||||
S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY);
|
||||
String dynamoDbTableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY);
|
||||
Assume.assumeTrue("No DynamoDB table name configured", !StringUtils
|
||||
.isEmpty(dynamoDbTableName));
|
||||
Assume.assumeTrue("No DynamoDB table name configured",
|
||||
!StringUtils.isEmpty(dynamoDbTableName));
|
||||
|
||||
// We should assert that the table name is configured, so the test should
|
||||
// fail if it's not configured.
|
||||
assertTrue("Test DynamoDB table name '"
|
||||
+ S3GUARD_DDB_TEST_TABLE_NAME_KEY + "' should be set to run "
|
||||
+ "integration tests.", testDynamoDBTableName != null);
|
||||
assertNotNull("Test DynamoDB table name '"
|
||||
+ S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY + "'"
|
||||
+ " should be set to run integration tests.",
|
||||
testDynamoDBTableName);
|
||||
|
||||
// We should assert that the test table is not the same as the production
|
||||
// table, as the test table could be modified and destroyed multiple
|
||||
// times during the test.
|
||||
assertTrue("Test DynamoDB table name: '"
|
||||
+ S3GUARD_DDB_TEST_TABLE_NAME_KEY + "' and production table name: '"
|
||||
+ S3GUARD_DDB_TABLE_NAME_KEY + "' can not be the same.",
|
||||
!conf.get(S3GUARD_DDB_TABLE_NAME_KEY).equals(testDynamoDBTableName));
|
||||
assertNotEquals("Test DynamoDB table name: "
|
||||
+ "'" + S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY + "'"
|
||||
+ " and production table name: "
|
||||
+ "'" + S3GUARD_DDB_TABLE_NAME_KEY + "' can not be the same.",
|
||||
testDynamoDBTableName, conf.get(S3GUARD_DDB_TABLE_NAME_KEY));
|
||||
|
||||
// We can use that table in the test if these assertions are valid
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName);
|
||||
|
||||
LOG.debug("Creating static ddbms which will be shared between tests.");
|
||||
enableOnDemand(conf);
|
||||
|
||||
ddbmsStatic = new DynamoDBMetadataStore();
|
||||
ddbmsStatic.initialize(conf);
|
||||
}
|
||||
@ -198,18 +211,23 @@ private static void assumeThatDynamoMetadataStoreImpl(Configuration conf){
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
LOG.info("Removing data from ddbms table in teardown.");
|
||||
// The following is a way to be sure the table will be cleared and there
|
||||
// will be no leftovers after the test.
|
||||
PathMetadata meta = ddbmsStatic.get(strToPath("/"));
|
||||
if (meta != null){
|
||||
for (DescendantsIterator desc = new DescendantsIterator(ddbmsStatic, meta);
|
||||
desc.hasNext();) {
|
||||
ddbmsStatic.forgetMetadata(desc.next().getPath());
|
||||
try {
|
||||
if (ddbmsStatic != null) {
|
||||
LOG.info("Removing data from ddbms table in teardown.");
|
||||
// The following is a way to be sure the table will be cleared and there
|
||||
// will be no leftovers after the test.
|
||||
PathMetadata meta = ddbmsStatic.get(strToPath("/"));
|
||||
if (meta != null){
|
||||
for (DescendantsIterator desc =
|
||||
new DescendantsIterator(ddbmsStatic, meta);
|
||||
desc.hasNext();) {
|
||||
ddbmsStatic.forgetMetadata(desc.next().getPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
|
||||
fileSystem.close();
|
||||
IOUtils.cleanupWithLogger(LOG, fileSystem);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -263,6 +281,29 @@ private S3AFileSystem getFileSystem() {
|
||||
return this.fileSystem;
|
||||
}
|
||||
|
||||
/**
|
||||
* Force the configuration into DDB on demand, so that
|
||||
* even if a test bucket isn't cleaned up, the cost is $0.
|
||||
* @param conf configuration to patch.
|
||||
*/
|
||||
public static void enableOnDemand(Configuration conf) {
|
||||
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 0);
|
||||
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the configuration needed to create a table; extracts
|
||||
* it from the filesystem then always patches it to be on demand.
|
||||
* Why the patch? It means even if a cached FS has brought in
|
||||
* some provisioned values, they get reset.
|
||||
* @return a new configuration
|
||||
*/
|
||||
private Configuration getTableCreationConfig() {
|
||||
Configuration conf = new Configuration(getFileSystem().getConf());
|
||||
enableOnDemand(conf);
|
||||
return conf;
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests that after initialize() using an S3AFileSystem object, the
|
||||
* instance should have been initialized successfully, and tables are ACTIVE.
|
||||
@ -272,9 +313,11 @@ public void testInitialize() throws IOException {
|
||||
final S3AFileSystem s3afs = this.fileSystem;
|
||||
final String tableName =
|
||||
getTestTableName("testInitialize");
|
||||
final Configuration conf = s3afs.getConf();
|
||||
Configuration conf = getFileSystem().getConf();
|
||||
enableOnDemand(conf);
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(s3afs);
|
||||
verifyTableInitialized(tableName, ddbms.getDynamoDB());
|
||||
assertNotNull(ddbms.getTable());
|
||||
@ -285,7 +328,9 @@ public void testInitialize() throws IOException {
|
||||
" region as S3 bucket",
|
||||
expectedRegion,
|
||||
ddbms.getRegion());
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
@ -297,7 +342,7 @@ public void testInitialize() throws IOException {
|
||||
public void testInitializeWithConfiguration() throws IOException {
|
||||
final String tableName =
|
||||
getTestTableName("testInitializeWithConfiguration");
|
||||
final Configuration conf = getFileSystem().getConf();
|
||||
final Configuration conf = getTableCreationConfig();
|
||||
conf.unset(S3GUARD_DDB_TABLE_NAME_KEY);
|
||||
String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
|
||||
getFileSystem().getBucketLocation());
|
||||
@ -316,7 +361,8 @@ public void testInitializeWithConfiguration() throws IOException {
|
||||
}
|
||||
// config region
|
||||
conf.set(S3GUARD_DDB_REGION_KEY, savedRegion);
|
||||
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(conf);
|
||||
verifyTableInitialized(tableName, ddbms.getDynamoDB());
|
||||
assertNotNull(ddbms.getTable());
|
||||
@ -324,7 +370,9 @@ public void testInitializeWithConfiguration() throws IOException {
|
||||
assertEquals("Unexpected key schema found!",
|
||||
keySchema(),
|
||||
ddbms.getTable().describe().getKeySchema());
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
@ -434,13 +482,14 @@ public void testItemLacksVersion() throws Throwable {
|
||||
@Test
|
||||
public void testTableVersionRequired() throws Exception {
|
||||
String tableName = getTestTableName("testTableVersionRequired");
|
||||
Configuration conf = getFileSystem().getConf();
|
||||
Configuration conf = getTableCreationConfig();
|
||||
int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES,
|
||||
S3GUARD_DDB_MAX_RETRIES_DEFAULT);
|
||||
conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3);
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||
|
||||
try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(conf);
|
||||
Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB());
|
||||
table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
|
||||
@ -450,7 +499,9 @@ public void testTableVersionRequired() throws Exception {
|
||||
() -> ddbms.initTable());
|
||||
|
||||
conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries);
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,10 +512,11 @@ public void testTableVersionRequired() throws Exception {
|
||||
@Test
|
||||
public void testTableVersionMismatch() throws Exception {
|
||||
String tableName = getTestTableName("testTableVersionMismatch");
|
||||
Configuration conf = getFileSystem().getConf();
|
||||
Configuration conf = getTableCreationConfig();
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||
|
||||
try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(conf);
|
||||
Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB());
|
||||
table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
|
||||
@ -474,7 +526,9 @@ public void testTableVersionMismatch() throws Exception {
|
||||
// create existing table
|
||||
intercept(IOException.class, E_INCOMPATIBLE_VERSION,
|
||||
() -> ddbms.initTable());
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
@ -491,10 +545,18 @@ public void testFailNonexistentTable() throws IOException {
|
||||
getTestTableName("testFailNonexistentTable");
|
||||
final S3AFileSystem s3afs = getFileSystem();
|
||||
final Configuration conf = s3afs.getConf();
|
||||
enableOnDemand(conf);
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||
String b = fsUri.getHost();
|
||||
clearBucketOption(conf, b, S3GUARD_DDB_TABLE_CREATE_KEY);
|
||||
clearBucketOption(conf, b, S3_METADATA_STORE_IMPL);
|
||||
clearBucketOption(conf, b, S3GUARD_DDB_TABLE_NAME_KEY);
|
||||
conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY);
|
||||
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
ddbms.initialize(s3afs);
|
||||
// if an exception was not raised, a table was created.
|
||||
// So destroy it before failing.
|
||||
ddbms.destroy();
|
||||
fail("Should have failed as table does not exist and table auto-creation"
|
||||
+ " is disabled");
|
||||
} catch (IOException ignored) {
|
||||
@ -606,31 +668,36 @@ public void testMovePopulatesAncestors() throws IOException {
|
||||
public void testProvisionTable() throws Exception {
|
||||
final String tableName
|
||||
= getTestTableName("testProvisionTable-" + UUID.randomUUID());
|
||||
Configuration conf = getFileSystem().getConf();
|
||||
final Configuration conf = getTableCreationConfig();
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||
|
||||
try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 2);
|
||||
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 2);
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(conf);
|
||||
DynamoDB dynamoDB = ddbms.getDynamoDB();
|
||||
final ProvisionedThroughputDescription oldProvision =
|
||||
dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
|
||||
ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2,
|
||||
oldProvision.getWriteCapacityUnits() * 2);
|
||||
final DDBCapacities oldProvision = DDBCapacities.extractCapacities(
|
||||
dynamoDB.getTable(tableName).describe().getProvisionedThroughput());
|
||||
Assume.assumeFalse("Table is on-demand", oldProvision.isOnDemandTable());
|
||||
long desiredReadCapacity = oldProvision.getRead() - 1;
|
||||
long desiredWriteCapacity = oldProvision.getWrite() - 1;
|
||||
ddbms.provisionTable(desiredReadCapacity,
|
||||
desiredWriteCapacity);
|
||||
ddbms.initTable();
|
||||
// we have to wait until the provisioning settings are applied,
|
||||
// so until the table is ACTIVE again and not in UPDATING
|
||||
ddbms.getTable().waitForActive();
|
||||
final ProvisionedThroughputDescription newProvision =
|
||||
dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
|
||||
LOG.info("Old provision = {}, new provision = {}", oldProvision,
|
||||
newProvision);
|
||||
final DDBCapacities newProvision = DDBCapacities.extractCapacities(
|
||||
dynamoDB.getTable(tableName).describe().getProvisionedThroughput());
|
||||
assertEquals("Check newly provisioned table read capacity units.",
|
||||
oldProvision.getReadCapacityUnits() * 2,
|
||||
newProvision.getReadCapacityUnits().longValue());
|
||||
desiredReadCapacity,
|
||||
newProvision.getRead());
|
||||
assertEquals("Check newly provisioned table write capacity units.",
|
||||
oldProvision.getWriteCapacityUnits() * 2,
|
||||
newProvision.getWriteCapacityUnits().longValue());
|
||||
desiredWriteCapacity,
|
||||
newProvision.getWrite());
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
@ -639,9 +706,11 @@ public void testDeleteTable() throws Exception {
|
||||
final String tableName = getTestTableName("testDeleteTable");
|
||||
Path testPath = new Path(new Path(fsUri), "/" + tableName);
|
||||
final S3AFileSystem s3afs = getFileSystem();
|
||||
final Configuration conf = s3afs.getConf();
|
||||
final Configuration conf = getTableCreationConfig();
|
||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
enableOnDemand(conf);
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(s3afs);
|
||||
// we can list the empty table
|
||||
ddbms.listChildren(testPath);
|
||||
@ -649,23 +718,22 @@ public void testDeleteTable() throws Exception {
|
||||
ddbms.destroy();
|
||||
verifyTableNotExist(tableName, dynamoDB);
|
||||
|
||||
// delete table once more; be ResourceNotFoundException swallowed silently
|
||||
// delete table once more; the ResourceNotFoundException swallowed
|
||||
// silently
|
||||
ddbms.destroy();
|
||||
verifyTableNotExist(tableName, dynamoDB);
|
||||
try {
|
||||
// we can no longer list the destroyed table
|
||||
ddbms.listChildren(testPath);
|
||||
fail("Should have failed after the table is destroyed!");
|
||||
} catch (IOException ignored) {
|
||||
}
|
||||
intercept(IOException.class, "",
|
||||
"Should have failed after the table is destroyed!",
|
||||
() -> ddbms.listChildren(testPath));
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableTagging() throws IOException {
|
||||
final Configuration conf = getFileSystem().getConf();
|
||||
|
||||
final Configuration conf = getTableCreationConfig();
|
||||
// clear all table tagging config before this test
|
||||
conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG).keySet().forEach(
|
||||
propKey -> conf.unset(S3GUARD_DDB_TABLE_TAG + propKey)
|
||||
@ -683,7 +751,8 @@ public void testTableTagging() throws IOException {
|
||||
conf.set(S3GUARD_DDB_TABLE_TAG + tagEntry.getKey(), tagEntry.getValue());
|
||||
}
|
||||
|
||||
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
||||
try {
|
||||
ddbms.initialize(conf);
|
||||
assertNotNull(ddbms.getTable());
|
||||
assertEquals(tableName, ddbms.getTable().getTableName());
|
||||
@ -696,6 +765,9 @@ public void testTableTagging() throws IOException {
|
||||
for (Tag tag : tags) {
|
||||
Assert.assertEquals(tagMap.get(tag.getKey()), tag.getValue());
|
||||
}
|
||||
} finally {
|
||||
ddbms.destroy();
|
||||
ddbms.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,6 @@
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_TAG;
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestDynamoTablePrefix;
|
||||
import static org.apache.hadoop.fs.s3a.S3AUtils.setBucketOption;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
|
||||
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.*;
|
||||
@ -178,8 +177,8 @@ public void testDynamoDBInitDestroyCycle() throws Throwable {
|
||||
expectSuccess("Init command did not exit successfully - see output",
|
||||
initCmd,
|
||||
Init.NAME,
|
||||
"-" + READ_FLAG, "2",
|
||||
"-" + WRITE_FLAG, "2",
|
||||
"-" + READ_FLAG, "0",
|
||||
"-" + WRITE_FLAG, "0",
|
||||
"-" + META_FLAG, "dynamodb://" + testTableName,
|
||||
testS3Url);
|
||||
// Verify it exists
|
||||
@ -210,39 +209,21 @@ public void testDynamoDBInitDestroyCycle() throws Throwable {
|
||||
testS3Url);
|
||||
assertTrue("No Dynamo diagnostics in output " + info,
|
||||
info.contains(DESCRIPTION));
|
||||
assertTrue("No Dynamo diagnostics in output " + info,
|
||||
info.contains(DESCRIPTION));
|
||||
|
||||
// get the current values to set again
|
||||
|
||||
// play with the set-capacity option
|
||||
String fsURI = getFileSystem().getUri().toString();
|
||||
DDBCapacities original = getCapacities();
|
||||
String fsURI = getFileSystem().getUri().toString();
|
||||
if (!original.isOnDemandTable()) {
|
||||
// classic provisioned table
|
||||
assertTrue("Wrong billing mode in " + info,
|
||||
info.contains(BILLING_MODE_PROVISIONED));
|
||||
String capacityOut = exec(newSetCapacity(),
|
||||
SetCapacity.NAME,
|
||||
fsURI);
|
||||
LOG.info("Set Capacity output=\n{}", capacityOut);
|
||||
capacityOut = exec(newSetCapacity(),
|
||||
SetCapacity.NAME,
|
||||
"-" + READ_FLAG, original.getReadStr(),
|
||||
"-" + WRITE_FLAG, original.getWriteStr(),
|
||||
fsURI);
|
||||
LOG.info("Set Capacity output=\n{}", capacityOut);
|
||||
} else {
|
||||
// on demand table
|
||||
assertTrue("Wrong billing mode in " + info,
|
||||
info.contains(BILLING_MODE_PER_REQUEST));
|
||||
// on demand tables fail here, so expect that
|
||||
intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY,
|
||||
() -> exec(newSetCapacity(),
|
||||
SetCapacity.NAME,
|
||||
assertTrue("Wrong billing mode in " + info,
|
||||
info.contains(BILLING_MODE_PER_REQUEST));
|
||||
// per-request tables fail here, so expect that
|
||||
intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY,
|
||||
() -> exec(newSetCapacity(),
|
||||
SetCapacity.NAME,
|
||||
fsURI));
|
||||
}
|
||||
|
||||
// that call does not change the values
|
||||
original.checkEquals("unchanged", getCapacities());
|
||||
|
||||
// Destroy MetadataStore
|
||||
Destroy destroyCmd = new Destroy(fs.getConf());
|
||||
|
@ -44,7 +44,6 @@
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.test.HadoopTestBase;
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.isMetadataStoreAuthoritative;
|
||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.metadataStorePersistsAuthoritativeBit;
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user