HADOOP-15563. S3Guard to support creating on-demand DDB tables.

Contributed by Steve Loughran

Change-Id: I2262b5b9f52e42ded8ed6f50fd39756f96e77087
This commit is contained in:
Steve Loughran 2019-06-07 18:26:06 +01:00
parent 85479577da
commit 4e38dafde4
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
11 changed files with 375 additions and 193 deletions

View File

@ -1581,23 +1581,27 @@
<property> <property>
<name>fs.s3a.s3guard.ddb.table.capacity.read</name> <name>fs.s3a.s3guard.ddb.table.capacity.read</name>
<value>500</value> <value>0</value>
<description> <description>
Provisioned throughput requirements for read operations in terms of capacity Provisioned throughput requirements for read operations in terms of capacity
units for the DynamoDB table. This config value will only be used when units for the DynamoDB table. This config value will only be used when
creating a new DynamoDB table, though later you can manually provision by creating a new DynamoDB table.
increasing or decreasing read capacity as needed for existing tables. If set to 0 (the default), new tables are created with "per-request" capacity.
See DynamoDB documents for more information. If a positive integer is provided for this and the write capacity, then
a table with "provisioned capacity" will be created.
You can change the capacity of an existing provisioned-capacity table
through the "s3guard set-capacity" command.
</description> </description>
</property> </property>
<property> <property>
<name>fs.s3a.s3guard.ddb.table.capacity.write</name> <name>fs.s3a.s3guard.ddb.table.capacity.write</name>
<value>100</value> <value>0</value>
<description> <description>
Provisioned throughput requirements for write operations in terms of Provisioned throughput requirements for write operations in terms of
capacity units for the DynamoDB table. Refer to related config capacity units for the DynamoDB table.
fs.s3a.s3guard.ddb.table.capacity.read before usage. If set to 0 (the default), new tables are created with "per-request" capacity.
Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read
</description> </description>
</property> </property>

View File

@ -439,7 +439,6 @@ private Constants() {
* This config has no default value. If the user does not set this, the * This config has no default value. If the user does not set this, the
* S3Guard will operate table in the associated S3 bucket region. * S3Guard will operate table in the associated S3 bucket region.
*/ */
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_REGION_KEY = public static final String S3GUARD_DDB_REGION_KEY =
"fs.s3a.s3guard.ddb.region"; "fs.s3a.s3guard.ddb.region";
@ -449,7 +448,6 @@ private Constants() {
* This config has no default value. If the user does not set this, the * This config has no default value. If the user does not set this, the
* S3Guard implementation will use the respective S3 bucket name. * S3Guard implementation will use the respective S3 bucket name.
*/ */
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_NAME_KEY = public static final String S3GUARD_DDB_TABLE_NAME_KEY =
"fs.s3a.s3guard.ddb.table"; "fs.s3a.s3guard.ddb.table";
@ -459,36 +457,45 @@ private Constants() {
* For example: * For example:
* fs.s3a.s3guard.ddb.table.tag.mytag * fs.s3a.s3guard.ddb.table.tag.mytag
*/ */
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_TAG = public static final String S3GUARD_DDB_TABLE_TAG =
"fs.s3a.s3guard.ddb.table.tag."; "fs.s3a.s3guard.ddb.table.tag.";
/**
* Test table name to use during DynamoDB integration test.
*
* The table will be modified, and deleted in the end of the tests.
* If this value is not set, the integration tests that would be destructive
* won't run.
*/
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TEST_TABLE_NAME_KEY =
"fs.s3a.s3guard.ddb.test.table";
/** /**
* Whether to create the DynamoDB table if the table does not exist. * Whether to create the DynamoDB table if the table does not exist.
* Value: {@value}.
*/ */
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_TABLE_CREATE_KEY = public static final String S3GUARD_DDB_TABLE_CREATE_KEY =
"fs.s3a.s3guard.ddb.table.create"; "fs.s3a.s3guard.ddb.table.create";
@InterfaceStability.Unstable /**
* Read capacity when creating a table.
* When it and the write capacity are both "0", a per-request table is
* created.
* Value: {@value}.
*/
public static final String S3GUARD_DDB_TABLE_CAPACITY_READ_KEY = public static final String S3GUARD_DDB_TABLE_CAPACITY_READ_KEY =
"fs.s3a.s3guard.ddb.table.capacity.read"; "fs.s3a.s3guard.ddb.table.capacity.read";
public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 500;
@InterfaceStability.Unstable /**
* Default read capacity when creating a table.
* Value: {@value}.
*/
public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 0;
/**
* Write capacity when creating a table.
* When it and the read capacity are both "0", a per-request table is
* created.
* Value: {@value}.
*/
public static final String S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY = public static final String S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY =
"fs.s3a.s3guard.ddb.table.capacity.write"; "fs.s3a.s3guard.ddb.table.capacity.write";
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 100;
/**
* Default write capacity when creating a table.
* Value: {@value}.
*/
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 0;
/** /**
* The maximum put or delete requests per BatchWriteItem request. * The maximum put or delete requests per BatchWriteItem request.
@ -497,7 +504,6 @@ private Constants() {
*/ */
public static final int S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT = 25; public static final int S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT = 25;
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_MAX_RETRIES = public static final String S3GUARD_DDB_MAX_RETRIES =
"fs.s3a.s3guard.ddb.max.retries"; "fs.s3a.s3guard.ddb.max.retries";
@ -509,7 +515,6 @@ private Constants() {
public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT =
DEFAULT_MAX_ERROR_RETRIES; DEFAULT_MAX_ERROR_RETRIES;
@InterfaceStability.Unstable
public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL = public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL =
"fs.s3a.s3guard.ddb.throttle.retry.interval"; "fs.s3a.s3guard.ddb.throttle.retry.interval";
public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT = public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT =
@ -528,7 +533,6 @@ private Constants() {
/** /**
* The default "Null" metadata store: {@value}. * The default "Null" metadata store: {@value}.
*/ */
@InterfaceStability.Unstable
public static final String S3GUARD_METASTORE_NULL public static final String S3GUARD_METASTORE_NULL
= "org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore"; = "org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore";
@ -561,7 +565,6 @@ private Constants() {
/** /**
* Use DynamoDB for the metadata: {@value}. * Use DynamoDB for the metadata: {@value}.
*/ */
@InterfaceStability.Unstable
public static final String S3GUARD_METASTORE_DYNAMO public static final String S3GUARD_METASTORE_DYNAMO
= "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore"; = "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore";

View File

@ -59,6 +59,7 @@
import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec; import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec;
import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec; import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec;
import com.amazonaws.services.dynamodbv2.document.utils.ValueMap; import com.amazonaws.services.dynamodbv2.document.utils.ValueMap;
import com.amazonaws.services.dynamodbv2.model.BillingMode;
import com.amazonaws.services.dynamodbv2.model.CreateTableRequest; import com.amazonaws.services.dynamodbv2.model.CreateTableRequest;
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
@ -1259,11 +1260,26 @@ void initTable() throws IOException {
tableName, region, (created != null) ? new Date(created) : null); tableName, region, (created != null) ? new Date(created) : null);
} catch (ResourceNotFoundException rnfe) { } catch (ResourceNotFoundException rnfe) {
if (conf.getBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, false)) { if (conf.getBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, false)) {
final ProvisionedThroughput capacity = new ProvisionedThroughput( long readCapacity = conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY,
conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT);
S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT), long writeCapacity = conf.getLong(
conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY,
S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT)); S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT);
ProvisionedThroughput capacity;
if (readCapacity > 0 && writeCapacity > 0) {
capacity = new ProvisionedThroughput(
readCapacity,
writeCapacity);
} else {
// at least one capacity value is <= 0
// verify they are both exactly zero
Preconditions.checkArgument(
readCapacity == 0 && writeCapacity == 0,
"S3Guard table read capacity %d and and write capacity %d"
+ " are inconsistent", readCapacity, writeCapacity);
// and set the capacity to null for per-request billing.
capacity = null;
}
createTable(capacity); createTable(capacity);
} else { } else {
@ -1403,20 +1419,31 @@ private void waitForTableActive(Table t) throws IOException {
* marker. * marker.
* Creating an setting up the table isn't wrapped by any retry operations; * Creating an setting up the table isn't wrapped by any retry operations;
* the wait for a table to become available is RetryTranslated. * the wait for a table to become available is RetryTranslated.
* @param capacity capacity to provision * @param capacity capacity to provision. If null: create a per-request
* table.
* @throws IOException on any failure. * @throws IOException on any failure.
* @throws InterruptedIOException if the wait was interrupted * @throws InterruptedIOException if the wait was interrupted
*/ */
@Retries.OnceRaw @Retries.OnceRaw
private void createTable(ProvisionedThroughput capacity) throws IOException { private void createTable(ProvisionedThroughput capacity) throws IOException {
try { try {
LOG.info("Creating non-existent DynamoDB table {} in region {}", String mode;
tableName, region); CreateTableRequest request = new CreateTableRequest()
table = dynamoDB.createTable(new CreateTableRequest()
.withTableName(tableName) .withTableName(tableName)
.withKeySchema(keySchema()) .withKeySchema(keySchema())
.withAttributeDefinitions(attributeDefinitions()) .withAttributeDefinitions(attributeDefinitions());
.withProvisionedThroughput(capacity)); if (capacity != null) {
mode = String.format("with provisioned read capacity %d and"
+ " write capacity %s",
capacity.getReadCapacityUnits(), capacity.getWriteCapacityUnits());
request.withProvisionedThroughput(capacity);
} else {
mode = "with pay-per-request billing";
request.withBillingMode(BillingMode.PAY_PER_REQUEST);
}
LOG.info("Creating non-existent DynamoDB table {} in region {} {}",
tableName, region, mode);
table = dynamoDB.createTable(request);
LOG.debug("Awaiting table becoming active"); LOG.debug("Awaiting table becoming active");
} catch (ResourceInUseException e) { } catch (ResourceInUseException e) {
LOG.warn("ResourceInUseException while creating DynamoDB table {} " LOG.warn("ResourceInUseException while creating DynamoDB table {} "
@ -1446,13 +1473,21 @@ private PutItemOutcome putItem(Item item) {
* Provision the table with given read and write capacity units. * Provision the table with given read and write capacity units.
* Call will fail if the table is busy, or the new values match the current * Call will fail if the table is busy, or the new values match the current
* ones. * ones.
* @param readCapacity read units * <p>
* @param writeCapacity write units * Until the AWS SDK lets us switch a table to on-demand, an attempt to
* set the I/O capacity to zero will fail.
* @param readCapacity read units: must be greater than zero
* @param writeCapacity write units: must be greater than zero
* @throws IOException on a failure * @throws IOException on a failure
*/ */
@Retries.RetryTranslated @Retries.RetryTranslated
void provisionTable(Long readCapacity, Long writeCapacity) void provisionTable(Long readCapacity, Long writeCapacity)
throws IOException { throws IOException {
if (readCapacity == 0 || writeCapacity == 0) {
// table is pay on demand
throw new IOException(E_ON_DEMAND_NO_SET_CAPACITY);
}
final ProvisionedThroughput toProvision = new ProvisionedThroughput() final ProvisionedThroughput toProvision = new ProvisionedThroughput()
.withReadCapacityUnits(readCapacity) .withReadCapacityUnits(readCapacity)
.withWriteCapacityUnits(writeCapacity); .withWriteCapacityUnits(writeCapacity);

View File

@ -434,7 +434,9 @@ static class Init extends S3GuardTool {
"\n" + "\n" +
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
" is not supported."; " is not supported.\n"
+ "To create a table with per-request billing, set the read and write\n"
+ "capacities to 0";
Init(Configuration conf) { Init(Configuration conf) {
super(conf); super(conf);

View File

@ -251,9 +251,11 @@ this sets the table name to `my-ddb-table-name`
</property> </property>
``` ```
It is good to share a table across multiple buckets for multiple reasons. It is good to share a table across multiple buckets for multiple reasons,
especially if you are *not* using on-demand DynamoDB tables, and instead
prepaying for provisioned I/O capacity.
1. You are billed for the I/O capacity allocated to the table, 1. You are billed for the provisioned I/O capacity allocated to the table,
*even when the table is not used*. Sharing capacity can reduce costs. *even when the table is not used*. Sharing capacity can reduce costs.
1. You can share the "provision burden" across the buckets. That is, rather 1. You can share the "provision burden" across the buckets. That is, rather
@ -265,8 +267,13 @@ lower.
S3Guard, because there is only one table to review and configure in the S3Guard, because there is only one table to review and configure in the
AWS management console. AWS management console.
1. When you don't grant the permission to create DynamoDB tables to users.
A single pre-created table for all buckets avoids the needs for an administrator
to create one for every bucket.
When wouldn't you want to share a table? When wouldn't you want to share a table?
1. When you are using on-demand DynamoDB and want to keep each table isolated.
1. When you do explicitly want to provision I/O capacity to a specific bucket 1. When you do explicitly want to provision I/O capacity to a specific bucket
and table, isolated from others. and table, isolated from others.
@ -315,18 +322,25 @@ Next, you can choose whether or not the table will be automatically created
</property> </property>
``` ```
### 7. If creating a table: Set your DynamoDB I/O Capacity ### 7. If creating a table: Choose your billing mode (and perhaps I/O Capacity)
Next, you need to set the DynamoDB read and write throughput requirements you Next, you need to decide whether to use On-Demand DynamoDB and its
expect to need for your cluster. Setting higher values will cost you more pay-per-request billing (recommended), or to explicitly request a
money. *Note* that these settings only affect table creation when provisioned IO capacity.
Before AWS offered pay-per-request billing, the sole billing mechanism,
was "provisioned capacity". This mechanism requires you to choose
the DynamoDB read and write throughput requirements you
expect to need for your expected uses of the S3Guard table.
Setting higher values cost you more money -*even when the table was idle*
*Note* that these settings only affect table creation when
`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for `fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for
an existing table, use the AWS console or CLI tool. an existing table, use the AWS console or CLI tool.
For more details on DynamoDB capacity units, see the AWS page on [Capacity For more details on DynamoDB capacity units, see the AWS page on [Capacity
Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations). Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
The charges are incurred per hour for the life of the table, *even when the Provisioned IO capacity is billed per hour for the life of the table, *even when the
table and the underlying S3 buckets are not being used*. table and the underlying S3 buckets are not being used*.
There are also charges incurred for data storage and for data I/O outside of the There are also charges incurred for data storage and for data I/O outside of the
@ -334,34 +348,56 @@ region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path
and summary details of objects —the actual data is stored in S3, so billed at S3 and summary details of objects —the actual data is stored in S3, so billed at S3
rates. rates.
With provisioned I/O capacity, attempting to perform more I/O than the capacity
requested throttles the operation and may result in operations failing.
Larger I/O capacities cost more.
With the introduction of On-Demand DynamoDB, you can now avoid paying for
provisioned capacity by creating an on-demand table.
With an on-demand table you are not throttled if your DynamoDB requests exceed
any pre-provisioned limit, nor do you pay per hour even when a table is idle.
You do, however, pay more per DynamoDB operation.
Even so, the ability to cope with sudden bursts of read or write requests, combined
with the elimination of charges for idle tables, suit the use patterns made of
S3Guard tables by applications interacting with S3. That is: periods when the table
is rarely used, with intermittent high-load operations when directory trees
are scanned (query planning and similar), or updated (rename and delete operations).
We recommending using On-Demand DynamoDB for maximum performance in operations
such as query planning, and lowest cost when S3 buckets are not being accessed.
This is the default, as configured in the default configuration options.
```xml ```xml
<property> <property>
<name>fs.s3a.s3guard.ddb.table.capacity.read</name> <name>fs.s3a.s3guard.ddb.table.capacity.read</name>
<value>500</value> <value>0</value>
<description> <description>
Provisioned throughput requirements for read operations in terms of capacity Provisioned throughput requirements for read operations in terms of capacity
units for the DynamoDB table. This config value will only be used when units for the DynamoDB table. This config value will only be used when
creating a new DynamoDB table, though later you can manually provision by creating a new DynamoDB table.
increasing or decreasing read capacity as needed for existing tables. If set to 0 (the default), new tables are created with "per-request" capacity.
See DynamoDB documents for more information. If a positive integer is provided for this and the write capacity, then
a table with "provisioned capacity" will be created.
You can change the capacity of an existing provisioned-capacity table
through the "s3guard set-capacity" command.
</description> </description>
</property> </property>
<property> <property>
<name>fs.s3a.s3guard.ddb.table.capacity.write</name> <name>fs.s3a.s3guard.ddb.table.capacity.write</name>
<value>100</value> <value>0</value>
<description> <description>
Provisioned throughput requirements for write operations in terms of Provisioned throughput requirements for write operations in terms of
capacity units for the DynamoDB table. Refer to related config capacity units for the DynamoDB table.
fs.s3a.s3guard.ddb.table.capacity.read before usage. If set to 0 (the default), new tables are created with "per-request" capacity.
Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read
</description> </description>
</property> </property>
``` ```
Attempting to perform more I/O than the capacity requested throttles the
I/O, and may result in operations failing. Larger I/O capacities cost more.
We recommending using small read and write capacities when initially experimenting
with S3Guard, and considering DynamoDB On-Demand.
## Authenticating with S3Guard ## Authenticating with S3Guard
@ -369,9 +405,7 @@ The DynamoDB metadata store takes advantage of the fact that the DynamoDB
service uses the same authentication mechanisms as S3. S3Guard service uses the same authentication mechanisms as S3. S3Guard
gets all its credentials from the S3A client that is using it. gets all its credentials from the S3A client that is using it.
All existing S3 authentication mechanisms can be used, except for one All existing S3 authentication mechanisms can be used.
exception. Credentials placed in URIs are not supported for S3Guard, for security
reasons.
## Per-bucket S3Guard configuration ## Per-bucket S3Guard configuration
@ -512,7 +546,13 @@ hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
Creates and initializes an empty metadata store. Creates and initializes an empty metadata store.
A DynamoDB metadata store can be initialized with additional parameters A DynamoDB metadata store can be initialized with additional parameters
pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html): pertaining to capacity.
If these values are both zero, then an on-demand DynamoDB table is created;
if positive values then they set the
[Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html)
of the table.
```bash ```bash
[-write PROVISIONED_WRITES] [-read PROVISIONED_READS] [-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
@ -528,29 +568,31 @@ metadata store will be created with these tags in DynamoDB.
Example 1 Example 1
```bash ```bash
hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1 hadoop s3guard init -meta dynamodb://ireland-team -write 0 -read 0 s3a://ireland-1
``` ```
Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads, Creates an on-demand table "ireland-team",
in the same location as the bucket "ireland-1". in the same location as the S3 bucket "ireland-1".
Example 2 Example 2
```bash ```bash
hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 --read 0 --write 0
``` ```
Creates a table "ireland-team" in the region "eu-west-1.amazonaws.com" Creates a table "ireland-team" in the region "eu-west-1.amazonaws.com"
Example 3 Example 3
```bash ```bash
hadoop s3guard init -meta dynamodb://ireland-team -tag tag1=first;tag2=second; hadoop s3guard init -meta dynamodb://ireland-team -tag tag1=first;tag2=second;
``` ```
Creates a table "ireland-team" with tags "first" and "second". Creates a table "ireland-team" with tags "first" and "second". The read and
write capacity will be those of the site configuration's values of
`fs.s3a.s3guard.ddb.table.capacity.read` and `fs.s3a.s3guard.ddb.table.capacity.write`;
if these are both zero then it will be an on-demand table.
### Import a bucket: `s3guard import` ### Import a bucket: `s3guard import`
@ -588,7 +630,7 @@ hadoop s3guard diff s3a://ireland-1
Prints and optionally checks the s3guard and encryption status of a bucket. Prints and optionally checks the s3guard and encryption status of a bucket.
```bash ```bash
hadoop s3guard bucket-info [ -guarded ] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] s3a://BUCKET hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] s3a://BUCKET
``` ```
Options Options
@ -788,7 +830,8 @@ the region "eu-west-1".
### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity` ### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity`
Alter the read and/or write capacity of a s3guard table. Alter the read and/or write capacity of a s3guard table created with provisioned
I/O capacity.
```bash ```bash
hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a://BUCKET ) hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a://BUCKET )
@ -796,6 +839,9 @@ hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a:
The `--read` and `--write` units are those of `s3guard init`. The `--read` and `--write` units are those of `s3guard init`.
It cannot be used to change the I/O capacity of an on demand table (there is
no need), and nor can it be used to convert an existing table to being
on-demand. For that the AWS console must be used.
Example Example
@ -932,10 +978,10 @@ merits more testing before it could be considered reliable.
## Managing DynamoDB I/O Capacity ## Managing DynamoDB I/O Capacity
By default, DynamoDB is not only billed on use (data and I/O requests) Historically, DynamoDB has been not only billed on use (data and I/O requests)
-it is billed on allocated I/O Capacity. -but on provisioned I/O Capacity.
When an application makes more requests than With Provisioned IO, when an application makes more requests than
the allocated capacity permits, the request is rejected; it is up to the allocated capacity permits, the request is rejected; it is up to
the calling application to detect when it is being so throttled and the calling application to detect when it is being so throttled and
react. S3Guard does this, but as a result: when the client is being react. S3Guard does this, but as a result: when the client is being
@ -943,7 +989,7 @@ throttled, operations are slower. This capacity throttling is averaged
over a few minutes: a briefly overloaded table will not be throttled, over a few minutes: a briefly overloaded table will not be throttled,
but the rate cannot be sustained. but the rate cannot be sustained.
The load on a table isvisible in the AWS console: go to the The load on a table is visible in the AWS console: go to the
DynamoDB page for the table and select the "metrics" tab. DynamoDB page for the table and select the "metrics" tab.
If the graphs of throttled read or write If the graphs of throttled read or write
requests show that a lot of throttling has taken place, then there is not requests show that a lot of throttling has taken place, then there is not
@ -1015,20 +1061,33 @@ for S3Guard applications.
* There's no explicit limit on I/O capacity, so operations which make * There's no explicit limit on I/O capacity, so operations which make
heavy use of S3Guard tables (for example: SQL query planning) do not heavy use of S3Guard tables (for example: SQL query planning) do not
get throttled. get throttled.
* You are charged more per DynamoDB API call, in exchange for paying nothing
when you are not interacting with DynamoDB.
* There's no way put a limit on the I/O; you may unintentionally run up * There's no way put a limit on the I/O; you may unintentionally run up
large bills through sustained heavy load. large bills through sustained heavy load.
* The `s3guard set-capacity` command fails: it does not make sense any more. * The `s3guard set-capacity` command fails: it does not make sense any more.
When idle, S3Guard tables are only billed for the data stored, not for When idle, S3Guard tables are only billed for the data stored, not for
any unused capacity. For this reason, there is no benefit from sharing any unused capacity. For this reason, there is no performance benefit
a single S3Guard table across multiple buckets. from sharing a single S3Guard table across multiple buckets.
*Enabling DynamoDB On-Demand for a S3Guard table* *Creating a S3Guard Table with On-Demand Tables*
You cannot currently enable DynamoDB on-demand from the `s3guard` command The default settings for S3Guard are to create on-demand tables; this
when creating or updating a bucket. can also be done explicitly in the `s3guard init` command by setting the
read and write capacities to zero.
Instead it must be done through the AWS console or [the CLI](https://docs.aws.amazon.com/cli/latest/reference/dynamodb/update-table.html).
```bash
hadoop s3guard init -meta dynamodb://ireland-team -write 0 -read 0 s3a://ireland-1
```
*Enabling DynamoDB On-Demand for an existing S3Guard table*
You cannot currently convert an existing S3Guard table to being an on-demand
table through the `s3guard` command.
It can be done through the AWS console or [the CLI](https://docs.aws.amazon.com/cli/latest/reference/dynamodb/update-table.html).
From the Web console or the command line, switch the billing to pay-per-request. From the Web console or the command line, switch the billing to pay-per-request.
Once enabled, the read and write capacities of the table listed in the Once enabled, the read and write capacities of the table listed in the
@ -1078,7 +1137,7 @@ Metadata Store Diagnostics:
The "magic" committer is supported The "magic" committer is supported
``` ```
### <a name="autoscaling"></a> Autoscaling S3Guard tables. ### <a name="autoscaling"></a> Autoscaling (Provisioned Capacity) S3Guard tables.
[DynamoDB Auto Scaling](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AutoScaling.html) [DynamoDB Auto Scaling](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AutoScaling.html)
can automatically increase and decrease the allocated capacity. can automatically increase and decrease the allocated capacity.
@ -1093,7 +1152,7 @@ until any extra capacity is allocated. Furthermore, as this retrying will
block the threads from performing other operations -including more I/O, the block the threads from performing other operations -including more I/O, the
the autoscale may not scale fast enough. the autoscale may not scale fast enough.
This is why the DynamoDB On-Demand appears to be a better option for This is why the DynamoDB On-Demand appears is a better option for
workloads with Hadoop, Spark, Hive and other applications. workloads with Hadoop, Spark, Hive and other applications.
If autoscaling is to be used, we recommend experimenting with the option, If autoscaling is to be used, we recommend experimenting with the option,
@ -1259,18 +1318,18 @@ Error Code: ProvisionedThroughputExceededException;
``` ```
The I/O load of clients of the (shared) DynamoDB table was exceeded. The I/O load of clients of the (shared) DynamoDB table was exceeded.
1. Increase the capacity of the DynamoDB table. 1. Switch to On-Demand Dynamo DB tables (AWS console)
1. Increase the retry count and/or sleep time of S3Guard on throttle events. 1. Increase the capacity of the DynamoDB table (AWS console or `s3guard set-capacity`)/
1. Enable capacity autoscaling for the table in the AWS console. 1. Increase the retry count and/or sleep time of S3Guard on throttle events (Hadoop configuration).
### Error `Max retries exceeded` ### Error `Max retries exceeded`
The I/O load of clients of the (shared) DynamoDB table was exceeded, and The I/O load of clients of the (shared) DynamoDB table was exceeded, and
the number of attempts to retry the operation exceeded the configured amount. the number of attempts to retry the operation exceeded the configured amount.
1. Switch to On-Demand Dynamo DB tables (AWS console).
1. Increase the capacity of the DynamoDB table. 1. Increase the capacity of the DynamoDB table.
1. Increase the retry count and/or sleep time of S3Guard on throttle events. 1. Increase the retry count and/or sleep time of S3Guard on throttle events.
1. Enable capacity autoscaling for the table in the AWS console.
### Error when running `set-capacity`: `org.apache.hadoop.fs.s3a.AWSServiceThrottledException: ProvisionTable` ### Error when running `set-capacity`: `org.apache.hadoop.fs.s3a.AWSServiceThrottledException: ProvisionTable`
@ -1286,7 +1345,7 @@ Next decrease can be made at Wednesday, July 25, 2018 9:48:14 PM UTC
``` ```
There's are limit on how often you can change the capacity of an DynamoDB table; There's are limit on how often you can change the capacity of an DynamoDB table;
if you call set-capacity too often, it fails. Wait until the after the time indicated if you call `set-capacity` too often, it fails. Wait until the after the time indicated
and try again. and try again.
### Error `Invalid region specified` ### Error `Invalid region specified`

View File

@ -197,4 +197,14 @@ public interface S3ATestConstants {
Duration TEST_SESSION_TOKEN_DURATION = Duration.ofSeconds( Duration TEST_SESSION_TOKEN_DURATION = Duration.ofSeconds(
TEST_SESSION_TOKEN_DURATION_SECONDS); TEST_SESSION_TOKEN_DURATION_SECONDS);
/**
* Test table name to use during DynamoDB integration tests in
* {@code ITestDynamoDBMetadataStore}.
*
* The table will be modified, and deleted in the end of the tests.
* If this value is not set, the integration tests that would be destructive
* won't run.
*/
String S3GUARD_DDB_TEST_TABLE_NAME_KEY =
"fs.s3a.s3guard.ddb.test.table";
} }

View File

@ -59,7 +59,6 @@
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL;
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestDynamoTablePrefix;
import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption; import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.E_BAD_STATE; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.E_BAD_STATE;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
@ -332,7 +331,14 @@ public void testSetCapacityFailFastOnReadWriteOfZero() throws Exception{
@Test @Test
public void testBucketInfoUnguarded() throws Exception { public void testBucketInfoUnguarded() throws Exception {
final Configuration conf = getConfiguration(); final Configuration conf = getConfiguration();
URI fsUri = getFileSystem().getUri();
conf.set(S3GUARD_DDB_TABLE_CREATE_KEY, Boolean.FALSE.toString()); conf.set(S3GUARD_DDB_TABLE_CREATE_KEY, Boolean.FALSE.toString());
String bucket = fsUri.getHost();
clearBucketOption(conf, bucket,
S3GUARD_DDB_TABLE_CREATE_KEY);
clearBucketOption(conf, bucket, S3_METADATA_STORE_IMPL);
clearBucketOption(conf, bucket, S3GUARD_DDB_TABLE_NAME_KEY);
conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL);
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, conf.set(S3GUARD_DDB_TABLE_NAME_KEY,
"testBucketInfoUnguarded-" + UUID.randomUUID()); "testBucketInfoUnguarded-" + UUID.randomUUID());
@ -341,7 +347,7 @@ public void testBucketInfoUnguarded() throws Exception {
S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf); S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf);
String info = exec(infocmd, S3GuardTool.BucketInfo.NAME, String info = exec(infocmd, S3GuardTool.BucketInfo.NAME,
"-" + S3GuardTool.BucketInfo.UNGUARDED_FLAG, "-" + S3GuardTool.BucketInfo.UNGUARDED_FLAG,
getFileSystem().getUri().toString()); fsUri.toString());
assertTrue("Output should contain information about S3A client " + info, assertTrue("Output should contain information about S3A client " + info,
info.contains("S3A Client")); info.contains("S3A Client"));

View File

@ -21,10 +21,15 @@
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
import org.junit.Assert; import org.junit.Assert;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.READ_CAPACITY; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.READ_CAPACITY;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.WRITE_CAPACITY;
/**
* Tuple of read and write capacity of a DDB table.
*/
class DDBCapacities { class DDBCapacities {
private final long read, write; private final long read, write;
@ -49,12 +54,6 @@ String getWriteStr() {
return Long.toString(write); return Long.toString(write);
} }
void checkEquals(String text, DDBCapacities that) throws Exception {
if (!this.equals(that)) {
throw new Exception(text + " expected = " + this +"; actual = "+ that);
}
}
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (this == o) { if (this == o) {
@ -82,7 +81,7 @@ public String toString() {
} }
/** /**
* Is the the capacity that of a pay-on-demand table? * Is the the capacity that of an On-Demand table?
* @return true if the capacities are both 0. * @return true if the capacities are both 0.
*/ */
public boolean isOnDemandTable() { public boolean isOnDemandTable() {
@ -102,7 +101,19 @@ public static DDBCapacities extractCapacities(
read); read);
return new DDBCapacities( return new DDBCapacities(
Long.parseLong(read), Long.parseLong(read),
Long.parseLong(diagnostics.get(DynamoDBMetadataStore.WRITE_CAPACITY))); Long.parseLong(diagnostics.get(WRITE_CAPACITY)));
}
/**
* Given a throughput information from table.describe(), build
* a DDBCapacities object.
* @param throughput throughput description.
* @return the capacities
*/
public static DDBCapacities extractCapacities(
ProvisionedThroughputDescription throughput) {
return new DDBCapacities(throughput.getReadCapacityUnits(),
throughput.getWriteCapacityUnits());
} }
} }

View File

@ -33,7 +33,6 @@
import com.amazonaws.services.dynamodbv2.document.PrimaryKey; import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
import com.amazonaws.services.dynamodbv2.document.Table; import com.amazonaws.services.dynamodbv2.document.Table;
import com.amazonaws.services.dynamodbv2.model.ListTagsOfResourceRequest; import com.amazonaws.services.dynamodbv2.model.ListTagsOfResourceRequest;
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
import com.amazonaws.services.dynamodbv2.model.TableDescription; import com.amazonaws.services.dynamodbv2.model.TableDescription;
@ -43,6 +42,7 @@
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.contract.s3a.S3AContract; import org.apache.hadoop.fs.contract.s3a.S3AContract;
import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3ATestConstants;
import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
@ -64,6 +64,7 @@
import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*; import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
import static org.apache.hadoop.test.LambdaTestUtils.*; import static org.apache.hadoop.test.LambdaTestUtils.*;
@ -78,7 +79,15 @@
* *
* According to the base class, every test case will have independent contract * According to the base class, every test case will have independent contract
* to create a new {@link S3AFileSystem} instance and initializes it. * to create a new {@link S3AFileSystem} instance and initializes it.
* A table will be created and shared between the tests, * A table will be created and shared between the tests; some tests also
* create their own.
*
* Important: Any new test which creates a table must do the following
* <ol>
* <li>Enable on-demand pricing.</li>
* <li>Always destroy the table, even if an assertion fails.</li>
* </ol>
* This is needed to avoid "leaking" DDB tables and running up bills.
*/ */
public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase { public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
@ -121,7 +130,7 @@ public void setUp() throws Exception {
Assume.assumeTrue("Test DynamoDB table name should be set to run " Assume.assumeTrue("Test DynamoDB table name should be set to run "
+ "integration tests.", testDynamoDBTableName != null); + "integration tests.", testDynamoDBTableName != null);
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName);
enableOnDemand(conf);
s3AContract = new S3AContract(conf); s3AContract = new S3AContract(conf);
s3AContract.init(); s3AContract.init();
@ -141,36 +150,40 @@ public void setUp() throws Exception {
} }
} }
@BeforeClass @BeforeClass
public static void beforeClassSetup() throws IOException { public static void beforeClassSetup() throws IOException {
Configuration conf = prepareTestConfiguration(new Configuration()); Configuration conf = prepareTestConfiguration(new Configuration());
assumeThatDynamoMetadataStoreImpl(conf); assumeThatDynamoMetadataStoreImpl(conf);
// S3GUARD_DDB_TEST_TABLE_NAME_KEY and S3GUARD_DDB_TABLE_NAME_KEY should // S3GUARD_DDB_TEST_TABLE_NAME_KEY and S3GUARD_DDB_TABLE_NAME_KEY should
// be configured to use this test. // be configured to use this test.
testDynamoDBTableName = conf.get(S3GUARD_DDB_TEST_TABLE_NAME_KEY); testDynamoDBTableName = conf.get(
S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY);
String dynamoDbTableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY); String dynamoDbTableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY);
Assume.assumeTrue("No DynamoDB table name configured", !StringUtils Assume.assumeTrue("No DynamoDB table name configured",
.isEmpty(dynamoDbTableName)); !StringUtils.isEmpty(dynamoDbTableName));
// We should assert that the table name is configured, so the test should // We should assert that the table name is configured, so the test should
// fail if it's not configured. // fail if it's not configured.
assertTrue("Test DynamoDB table name '" assertNotNull("Test DynamoDB table name '"
+ S3GUARD_DDB_TEST_TABLE_NAME_KEY + "' should be set to run " + S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY + "'"
+ "integration tests.", testDynamoDBTableName != null); + " should be set to run integration tests.",
testDynamoDBTableName);
// We should assert that the test table is not the same as the production // We should assert that the test table is not the same as the production
// table, as the test table could be modified and destroyed multiple // table, as the test table could be modified and destroyed multiple
// times during the test. // times during the test.
assertTrue("Test DynamoDB table name: '" assertNotEquals("Test DynamoDB table name: "
+ S3GUARD_DDB_TEST_TABLE_NAME_KEY + "' and production table name: '" + "'" + S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY + "'"
+ S3GUARD_DDB_TABLE_NAME_KEY + "' can not be the same.", + " and production table name: "
!conf.get(S3GUARD_DDB_TABLE_NAME_KEY).equals(testDynamoDBTableName)); + "'" + S3GUARD_DDB_TABLE_NAME_KEY + "' can not be the same.",
testDynamoDBTableName, conf.get(S3GUARD_DDB_TABLE_NAME_KEY));
// We can use that table in the test if these assertions are valid // We can use that table in the test if these assertions are valid
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName);
LOG.debug("Creating static ddbms which will be shared between tests."); LOG.debug("Creating static ddbms which will be shared between tests.");
enableOnDemand(conf);
ddbmsStatic = new DynamoDBMetadataStore(); ddbmsStatic = new DynamoDBMetadataStore();
ddbmsStatic.initialize(conf); ddbmsStatic.initialize(conf);
} }
@ -198,18 +211,23 @@ private static void assumeThatDynamoMetadataStoreImpl(Configuration conf){
@Override @Override
public void tearDown() throws Exception { public void tearDown() throws Exception {
LOG.info("Removing data from ddbms table in teardown."); try {
// The following is a way to be sure the table will be cleared and there if (ddbmsStatic != null) {
// will be no leftovers after the test. LOG.info("Removing data from ddbms table in teardown.");
PathMetadata meta = ddbmsStatic.get(strToPath("/")); // The following is a way to be sure the table will be cleared and there
if (meta != null){ // will be no leftovers after the test.
for (DescendantsIterator desc = new DescendantsIterator(ddbmsStatic, meta); PathMetadata meta = ddbmsStatic.get(strToPath("/"));
desc.hasNext();) { if (meta != null){
ddbmsStatic.forgetMetadata(desc.next().getPath()); for (DescendantsIterator desc =
new DescendantsIterator(ddbmsStatic, meta);
desc.hasNext();) {
ddbmsStatic.forgetMetadata(desc.next().getPath());
}
}
} }
} catch (IOException ignored) {
} }
IOUtils.cleanupWithLogger(LOG, fileSystem);
fileSystem.close();
} }
/** /**
@ -263,6 +281,29 @@ private S3AFileSystem getFileSystem() {
return this.fileSystem; return this.fileSystem;
} }
/**
* Force the configuration into DDB on demand, so that
* even if a test bucket isn't cleaned up, the cost is $0.
* @param conf configuration to patch.
*/
public static void enableOnDemand(Configuration conf) {
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 0);
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 0);
}
/**
* Get the configuration needed to create a table; extracts
* it from the filesystem then always patches it to be on demand.
* Why the patch? It means even if a cached FS has brought in
* some provisioned values, they get reset.
* @return a new configuration
*/
private Configuration getTableCreationConfig() {
Configuration conf = new Configuration(getFileSystem().getConf());
enableOnDemand(conf);
return conf;
}
/** /**
* This tests that after initialize() using an S3AFileSystem object, the * This tests that after initialize() using an S3AFileSystem object, the
* instance should have been initialized successfully, and tables are ACTIVE. * instance should have been initialized successfully, and tables are ACTIVE.
@ -272,9 +313,11 @@ public void testInitialize() throws IOException {
final S3AFileSystem s3afs = this.fileSystem; final S3AFileSystem s3afs = this.fileSystem;
final String tableName = final String tableName =
getTestTableName("testInitialize"); getTestTableName("testInitialize");
final Configuration conf = s3afs.getConf(); Configuration conf = getFileSystem().getConf();
enableOnDemand(conf);
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(s3afs); ddbms.initialize(s3afs);
verifyTableInitialized(tableName, ddbms.getDynamoDB()); verifyTableInitialized(tableName, ddbms.getDynamoDB());
assertNotNull(ddbms.getTable()); assertNotNull(ddbms.getTable());
@ -285,7 +328,9 @@ public void testInitialize() throws IOException {
" region as S3 bucket", " region as S3 bucket",
expectedRegion, expectedRegion,
ddbms.getRegion()); ddbms.getRegion());
} finally {
ddbms.destroy(); ddbms.destroy();
ddbms.close();
} }
} }
@ -297,7 +342,7 @@ public void testInitialize() throws IOException {
public void testInitializeWithConfiguration() throws IOException { public void testInitializeWithConfiguration() throws IOException {
final String tableName = final String tableName =
getTestTableName("testInitializeWithConfiguration"); getTestTableName("testInitializeWithConfiguration");
final Configuration conf = getFileSystem().getConf(); final Configuration conf = getTableCreationConfig();
conf.unset(S3GUARD_DDB_TABLE_NAME_KEY); conf.unset(S3GUARD_DDB_TABLE_NAME_KEY);
String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY, String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
getFileSystem().getBucketLocation()); getFileSystem().getBucketLocation());
@ -316,7 +361,8 @@ public void testInitializeWithConfiguration() throws IOException {
} }
// config region // config region
conf.set(S3GUARD_DDB_REGION_KEY, savedRegion); conf.set(S3GUARD_DDB_REGION_KEY, savedRegion);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(conf); ddbms.initialize(conf);
verifyTableInitialized(tableName, ddbms.getDynamoDB()); verifyTableInitialized(tableName, ddbms.getDynamoDB());
assertNotNull(ddbms.getTable()); assertNotNull(ddbms.getTable());
@ -324,7 +370,9 @@ public void testInitializeWithConfiguration() throws IOException {
assertEquals("Unexpected key schema found!", assertEquals("Unexpected key schema found!",
keySchema(), keySchema(),
ddbms.getTable().describe().getKeySchema()); ddbms.getTable().describe().getKeySchema());
} finally {
ddbms.destroy(); ddbms.destroy();
ddbms.close();
} }
} }
@ -434,13 +482,14 @@ public void testItemLacksVersion() throws Throwable {
@Test @Test
public void testTableVersionRequired() throws Exception { public void testTableVersionRequired() throws Exception {
String tableName = getTestTableName("testTableVersionRequired"); String tableName = getTestTableName("testTableVersionRequired");
Configuration conf = getFileSystem().getConf(); Configuration conf = getTableCreationConfig();
int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES, int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES,
S3GUARD_DDB_MAX_RETRIES_DEFAULT); S3GUARD_DDB_MAX_RETRIES_DEFAULT);
conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3); conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3);
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(conf); ddbms.initialize(conf);
Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB()); Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB());
table.deleteItem(VERSION_MARKER_PRIMARY_KEY); table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
@ -450,7 +499,9 @@ public void testTableVersionRequired() throws Exception {
() -> ddbms.initTable()); () -> ddbms.initTable());
conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries); conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries);
} finally {
ddbms.destroy(); ddbms.destroy();
ddbms.close();
} }
} }
@ -461,10 +512,11 @@ public void testTableVersionRequired() throws Exception {
@Test @Test
public void testTableVersionMismatch() throws Exception { public void testTableVersionMismatch() throws Exception {
String tableName = getTestTableName("testTableVersionMismatch"); String tableName = getTestTableName("testTableVersionMismatch");
Configuration conf = getFileSystem().getConf(); Configuration conf = getTableCreationConfig();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(conf); ddbms.initialize(conf);
Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB()); Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB());
table.deleteItem(VERSION_MARKER_PRIMARY_KEY); table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
@ -474,7 +526,9 @@ public void testTableVersionMismatch() throws Exception {
// create existing table // create existing table
intercept(IOException.class, E_INCOMPATIBLE_VERSION, intercept(IOException.class, E_INCOMPATIBLE_VERSION,
() -> ddbms.initTable()); () -> ddbms.initTable());
} finally {
ddbms.destroy(); ddbms.destroy();
ddbms.close();
} }
} }
@ -491,10 +545,18 @@ public void testFailNonexistentTable() throws IOException {
getTestTableName("testFailNonexistentTable"); getTestTableName("testFailNonexistentTable");
final S3AFileSystem s3afs = getFileSystem(); final S3AFileSystem s3afs = getFileSystem();
final Configuration conf = s3afs.getConf(); final Configuration conf = s3afs.getConf();
enableOnDemand(conf);
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
String b = fsUri.getHost();
clearBucketOption(conf, b, S3GUARD_DDB_TABLE_CREATE_KEY);
clearBucketOption(conf, b, S3_METADATA_STORE_IMPL);
clearBucketOption(conf, b, S3GUARD_DDB_TABLE_NAME_KEY);
conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY); conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
ddbms.initialize(s3afs); ddbms.initialize(s3afs);
// if an exception was not raised, a table was created.
// So destroy it before failing.
ddbms.destroy();
fail("Should have failed as table does not exist and table auto-creation" fail("Should have failed as table does not exist and table auto-creation"
+ " is disabled"); + " is disabled");
} catch (IOException ignored) { } catch (IOException ignored) {
@ -606,31 +668,36 @@ public void testMovePopulatesAncestors() throws IOException {
public void testProvisionTable() throws Exception { public void testProvisionTable() throws Exception {
final String tableName final String tableName
= getTestTableName("testProvisionTable-" + UUID.randomUUID()); = getTestTableName("testProvisionTable-" + UUID.randomUUID());
Configuration conf = getFileSystem().getConf(); final Configuration conf = getTableCreationConfig();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 2);
try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 2);
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(conf); ddbms.initialize(conf);
DynamoDB dynamoDB = ddbms.getDynamoDB(); DynamoDB dynamoDB = ddbms.getDynamoDB();
final ProvisionedThroughputDescription oldProvision = final DDBCapacities oldProvision = DDBCapacities.extractCapacities(
dynamoDB.getTable(tableName).describe().getProvisionedThroughput(); dynamoDB.getTable(tableName).describe().getProvisionedThroughput());
ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2, Assume.assumeFalse("Table is on-demand", oldProvision.isOnDemandTable());
oldProvision.getWriteCapacityUnits() * 2); long desiredReadCapacity = oldProvision.getRead() - 1;
long desiredWriteCapacity = oldProvision.getWrite() - 1;
ddbms.provisionTable(desiredReadCapacity,
desiredWriteCapacity);
ddbms.initTable(); ddbms.initTable();
// we have to wait until the provisioning settings are applied, // we have to wait until the provisioning settings are applied,
// so until the table is ACTIVE again and not in UPDATING // so until the table is ACTIVE again and not in UPDATING
ddbms.getTable().waitForActive(); ddbms.getTable().waitForActive();
final ProvisionedThroughputDescription newProvision = final DDBCapacities newProvision = DDBCapacities.extractCapacities(
dynamoDB.getTable(tableName).describe().getProvisionedThroughput(); dynamoDB.getTable(tableName).describe().getProvisionedThroughput());
LOG.info("Old provision = {}, new provision = {}", oldProvision,
newProvision);
assertEquals("Check newly provisioned table read capacity units.", assertEquals("Check newly provisioned table read capacity units.",
oldProvision.getReadCapacityUnits() * 2, desiredReadCapacity,
newProvision.getReadCapacityUnits().longValue()); newProvision.getRead());
assertEquals("Check newly provisioned table write capacity units.", assertEquals("Check newly provisioned table write capacity units.",
oldProvision.getWriteCapacityUnits() * 2, desiredWriteCapacity,
newProvision.getWriteCapacityUnits().longValue()); newProvision.getWrite());
} finally {
ddbms.destroy(); ddbms.destroy();
ddbms.close();
} }
} }
@ -639,9 +706,11 @@ public void testDeleteTable() throws Exception {
final String tableName = getTestTableName("testDeleteTable"); final String tableName = getTestTableName("testDeleteTable");
Path testPath = new Path(new Path(fsUri), "/" + tableName); Path testPath = new Path(new Path(fsUri), "/" + tableName);
final S3AFileSystem s3afs = getFileSystem(); final S3AFileSystem s3afs = getFileSystem();
final Configuration conf = s3afs.getConf(); final Configuration conf = getTableCreationConfig();
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { enableOnDemand(conf);
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(s3afs); ddbms.initialize(s3afs);
// we can list the empty table // we can list the empty table
ddbms.listChildren(testPath); ddbms.listChildren(testPath);
@ -649,23 +718,22 @@ public void testDeleteTable() throws Exception {
ddbms.destroy(); ddbms.destroy();
verifyTableNotExist(tableName, dynamoDB); verifyTableNotExist(tableName, dynamoDB);
// delete table once more; be ResourceNotFoundException swallowed silently // delete table once more; the ResourceNotFoundException swallowed
// silently
ddbms.destroy(); ddbms.destroy();
verifyTableNotExist(tableName, dynamoDB); verifyTableNotExist(tableName, dynamoDB);
try { intercept(IOException.class, "",
// we can no longer list the destroyed table "Should have failed after the table is destroyed!",
ddbms.listChildren(testPath); () -> ddbms.listChildren(testPath));
fail("Should have failed after the table is destroyed!"); } finally {
} catch (IOException ignored) {
}
ddbms.destroy(); ddbms.destroy();
ddbms.close();
} }
} }
@Test @Test
public void testTableTagging() throws IOException { public void testTableTagging() throws IOException {
final Configuration conf = getFileSystem().getConf(); final Configuration conf = getTableCreationConfig();
// clear all table tagging config before this test // clear all table tagging config before this test
conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG).keySet().forEach( conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG).keySet().forEach(
propKey -> conf.unset(S3GUARD_DDB_TABLE_TAG + propKey) propKey -> conf.unset(S3GUARD_DDB_TABLE_TAG + propKey)
@ -683,7 +751,8 @@ public void testTableTagging() throws IOException {
conf.set(S3GUARD_DDB_TABLE_TAG + tagEntry.getKey(), tagEntry.getValue()); conf.set(S3GUARD_DDB_TABLE_TAG + tagEntry.getKey(), tagEntry.getValue());
} }
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
try {
ddbms.initialize(conf); ddbms.initialize(conf);
assertNotNull(ddbms.getTable()); assertNotNull(ddbms.getTable());
assertEquals(tableName, ddbms.getTable().getTableName()); assertEquals(tableName, ddbms.getTable().getTableName());
@ -696,6 +765,9 @@ public void testTableTagging() throws IOException {
for (Tag tag : tags) { for (Tag tag : tags) {
Assert.assertEquals(tagMap.get(tag.getKey()), tag.getValue()); Assert.assertEquals(tagMap.get(tag.getKey()), tag.getValue());
} }
} finally {
ddbms.destroy();
ddbms.close();
} }
} }

View File

@ -44,7 +44,6 @@
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY;
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_TAG; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_TAG;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestDynamoTablePrefix;
import static org.apache.hadoop.fs.s3a.S3AUtils.setBucketOption; import static org.apache.hadoop.fs.s3a.S3AUtils.setBucketOption;
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.*; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.*;
@ -178,8 +177,8 @@ public void testDynamoDBInitDestroyCycle() throws Throwable {
expectSuccess("Init command did not exit successfully - see output", expectSuccess("Init command did not exit successfully - see output",
initCmd, initCmd,
Init.NAME, Init.NAME,
"-" + READ_FLAG, "2", "-" + READ_FLAG, "0",
"-" + WRITE_FLAG, "2", "-" + WRITE_FLAG, "0",
"-" + META_FLAG, "dynamodb://" + testTableName, "-" + META_FLAG, "dynamodb://" + testTableName,
testS3Url); testS3Url);
// Verify it exists // Verify it exists
@ -210,39 +209,21 @@ public void testDynamoDBInitDestroyCycle() throws Throwable {
testS3Url); testS3Url);
assertTrue("No Dynamo diagnostics in output " + info, assertTrue("No Dynamo diagnostics in output " + info,
info.contains(DESCRIPTION)); info.contains(DESCRIPTION));
assertTrue("No Dynamo diagnostics in output " + info,
info.contains(DESCRIPTION));
// get the current values to set again // get the current values to set again
// play with the set-capacity option // play with the set-capacity option
String fsURI = getFileSystem().getUri().toString();
DDBCapacities original = getCapacities(); DDBCapacities original = getCapacities();
String fsURI = getFileSystem().getUri().toString(); assertTrue("Wrong billing mode in " + info,
if (!original.isOnDemandTable()) { info.contains(BILLING_MODE_PER_REQUEST));
// classic provisioned table // per-request tables fail here, so expect that
assertTrue("Wrong billing mode in " + info, intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY,
info.contains(BILLING_MODE_PROVISIONED)); () -> exec(newSetCapacity(),
String capacityOut = exec(newSetCapacity(), SetCapacity.NAME,
SetCapacity.NAME,
fsURI);
LOG.info("Set Capacity output=\n{}", capacityOut);
capacityOut = exec(newSetCapacity(),
SetCapacity.NAME,
"-" + READ_FLAG, original.getReadStr(),
"-" + WRITE_FLAG, original.getWriteStr(),
fsURI);
LOG.info("Set Capacity output=\n{}", capacityOut);
} else {
// on demand table
assertTrue("Wrong billing mode in " + info,
info.contains(BILLING_MODE_PER_REQUEST));
// on demand tables fail here, so expect that
intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY,
() -> exec(newSetCapacity(),
SetCapacity.NAME,
fsURI)); fsURI));
}
// that call does not change the values
original.checkEquals("unchanged", getCapacities());
// Destroy MetadataStore // Destroy MetadataStore
Destroy destroyCmd = new Destroy(fs.getConf()); Destroy destroyCmd = new Destroy(fs.getConf());

View File

@ -44,7 +44,6 @@
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.HadoopTestBase; import org.apache.hadoop.test.HadoopTestBase;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.isMetadataStoreAuthoritative;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.metadataStorePersistsAuthoritativeBit; import static org.apache.hadoop.fs.s3a.S3ATestUtils.metadataStorePersistsAuthoritativeBit;
/** /**