diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 6edcb670a4..b5056d1d23 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1581,23 +1581,27 @@ fs.s3a.s3guard.ddb.table.capacity.read - 500 + 0 Provisioned throughput requirements for read operations in terms of capacity - units for the DynamoDB table. This config value will only be used when - creating a new DynamoDB table, though later you can manually provision by - increasing or decreasing read capacity as needed for existing tables. - See DynamoDB documents for more information. + units for the DynamoDB table. This config value will only be used when + creating a new DynamoDB table. + If set to 0 (the default), new tables are created with "per-request" capacity. + If a positive integer is provided for this and the write capacity, then + a table with "provisioned capacity" will be created. + You can change the capacity of an existing provisioned-capacity table + through the "s3guard set-capacity" command. fs.s3a.s3guard.ddb.table.capacity.write - 100 + 0 Provisioned throughput requirements for write operations in terms of - capacity units for the DynamoDB table. Refer to related config - fs.s3a.s3guard.ddb.table.capacity.read before usage. + capacity units for the DynamoDB table. + If set to 0 (the default), new tables are created with "per-request" capacity. + Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 18ed7b4402..a8dc161e5e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -439,7 +439,6 @@ private Constants() { * This config has no default value. If the user does not set this, the * S3Guard will operate table in the associated S3 bucket region. */ - @InterfaceStability.Unstable public static final String S3GUARD_DDB_REGION_KEY = "fs.s3a.s3guard.ddb.region"; @@ -449,7 +448,6 @@ private Constants() { * This config has no default value. If the user does not set this, the * S3Guard implementation will use the respective S3 bucket name. */ - @InterfaceStability.Unstable public static final String S3GUARD_DDB_TABLE_NAME_KEY = "fs.s3a.s3guard.ddb.table"; @@ -459,36 +457,45 @@ private Constants() { * For example: * fs.s3a.s3guard.ddb.table.tag.mytag */ - @InterfaceStability.Unstable public static final String S3GUARD_DDB_TABLE_TAG = "fs.s3a.s3guard.ddb.table.tag."; - /** - * Test table name to use during DynamoDB integration test. - * - * The table will be modified, and deleted in the end of the tests. - * If this value is not set, the integration tests that would be destructive - * won't run. - */ - @InterfaceStability.Unstable - public static final String S3GUARD_DDB_TEST_TABLE_NAME_KEY = - "fs.s3a.s3guard.ddb.test.table"; - /** * Whether to create the DynamoDB table if the table does not exist. + * Value: {@value}. */ - @InterfaceStability.Unstable public static final String S3GUARD_DDB_TABLE_CREATE_KEY = "fs.s3a.s3guard.ddb.table.create"; - @InterfaceStability.Unstable + /** + * Read capacity when creating a table. + * When it and the write capacity are both "0", a per-request table is + * created. + * Value: {@value}. + */ public static final String S3GUARD_DDB_TABLE_CAPACITY_READ_KEY = "fs.s3a.s3guard.ddb.table.capacity.read"; - public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 500; - @InterfaceStability.Unstable + + /** + * Default read capacity when creating a table. + * Value: {@value}. + */ + public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 0; + + /** + * Write capacity when creating a table. + * When it and the read capacity are both "0", a per-request table is + * created. + * Value: {@value}. + */ public static final String S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY = "fs.s3a.s3guard.ddb.table.capacity.write"; - public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 100; + + /** + * Default write capacity when creating a table. + * Value: {@value}. + */ + public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 0; /** * The maximum put or delete requests per BatchWriteItem request. @@ -497,7 +504,6 @@ private Constants() { */ public static final int S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT = 25; - @InterfaceStability.Unstable public static final String S3GUARD_DDB_MAX_RETRIES = "fs.s3a.s3guard.ddb.max.retries"; @@ -509,7 +515,6 @@ private Constants() { public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = DEFAULT_MAX_ERROR_RETRIES; - @InterfaceStability.Unstable public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL = "fs.s3a.s3guard.ddb.throttle.retry.interval"; public static final String S3GUARD_DDB_THROTTLE_RETRY_INTERVAL_DEFAULT = @@ -528,7 +533,6 @@ private Constants() { /** * The default "Null" metadata store: {@value}. */ - @InterfaceStability.Unstable public static final String S3GUARD_METASTORE_NULL = "org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore"; @@ -561,7 +565,6 @@ private Constants() { /** * Use DynamoDB for the metadata: {@value}. */ - @InterfaceStability.Unstable public static final String S3GUARD_METASTORE_DYNAMO = "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index a9e1f33689..fa1a203fc7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -59,6 +59,7 @@ import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec; import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec; import com.amazonaws.services.dynamodbv2.document.utils.ValueMap; +import com.amazonaws.services.dynamodbv2.model.BillingMode; import com.amazonaws.services.dynamodbv2.model.CreateTableRequest; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; @@ -1259,11 +1260,26 @@ void initTable() throws IOException { tableName, region, (created != null) ? new Date(created) : null); } catch (ResourceNotFoundException rnfe) { if (conf.getBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, false)) { - final ProvisionedThroughput capacity = new ProvisionedThroughput( - conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, - S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT), - conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, - S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT)); + long readCapacity = conf.getLong(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, + S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT); + long writeCapacity = conf.getLong( + S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, + S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT); + ProvisionedThroughput capacity; + if (readCapacity > 0 && writeCapacity > 0) { + capacity = new ProvisionedThroughput( + readCapacity, + writeCapacity); + } else { + // at least one capacity value is <= 0 + // verify they are both exactly zero + Preconditions.checkArgument( + readCapacity == 0 && writeCapacity == 0, + "S3Guard table read capacity %d and and write capacity %d" + + " are inconsistent", readCapacity, writeCapacity); + // and set the capacity to null for per-request billing. + capacity = null; + } createTable(capacity); } else { @@ -1403,20 +1419,31 @@ private void waitForTableActive(Table t) throws IOException { * marker. * Creating an setting up the table isn't wrapped by any retry operations; * the wait for a table to become available is RetryTranslated. - * @param capacity capacity to provision + * @param capacity capacity to provision. If null: create a per-request + * table. * @throws IOException on any failure. * @throws InterruptedIOException if the wait was interrupted */ @Retries.OnceRaw private void createTable(ProvisionedThroughput capacity) throws IOException { try { - LOG.info("Creating non-existent DynamoDB table {} in region {}", - tableName, region); - table = dynamoDB.createTable(new CreateTableRequest() + String mode; + CreateTableRequest request = new CreateTableRequest() .withTableName(tableName) .withKeySchema(keySchema()) - .withAttributeDefinitions(attributeDefinitions()) - .withProvisionedThroughput(capacity)); + .withAttributeDefinitions(attributeDefinitions()); + if (capacity != null) { + mode = String.format("with provisioned read capacity %d and" + + " write capacity %s", + capacity.getReadCapacityUnits(), capacity.getWriteCapacityUnits()); + request.withProvisionedThroughput(capacity); + } else { + mode = "with pay-per-request billing"; + request.withBillingMode(BillingMode.PAY_PER_REQUEST); + } + LOG.info("Creating non-existent DynamoDB table {} in region {} {}", + tableName, region, mode); + table = dynamoDB.createTable(request); LOG.debug("Awaiting table becoming active"); } catch (ResourceInUseException e) { LOG.warn("ResourceInUseException while creating DynamoDB table {} " @@ -1446,13 +1473,21 @@ private PutItemOutcome putItem(Item item) { * Provision the table with given read and write capacity units. * Call will fail if the table is busy, or the new values match the current * ones. - * @param readCapacity read units - * @param writeCapacity write units + *

+ * Until the AWS SDK lets us switch a table to on-demand, an attempt to + * set the I/O capacity to zero will fail. + * @param readCapacity read units: must be greater than zero + * @param writeCapacity write units: must be greater than zero * @throws IOException on a failure */ @Retries.RetryTranslated void provisionTable(Long readCapacity, Long writeCapacity) throws IOException { + + if (readCapacity == 0 || writeCapacity == 0) { + // table is pay on demand + throw new IOException(E_ON_DEMAND_NO_SET_CAPACITY); + } final ProvisionedThroughput toProvision = new ProvisionedThroughput() .withReadCapacityUnits(readCapacity) .withWriteCapacityUnits(writeCapacity); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 448ea9213f..397a9cba67 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -434,7 +434,9 @@ static class Init extends S3GuardTool { "\n" + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + - " is not supported."; + " is not supported.\n" + + "To create a table with per-request billing, set the read and write\n" + + "capacities to 0"; Init(Configuration conf) { super(conf); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index a766abc616..94dc89b70d 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -251,9 +251,11 @@ this sets the table name to `my-ddb-table-name` ``` -It is good to share a table across multiple buckets for multiple reasons. +It is good to share a table across multiple buckets for multiple reasons, +especially if you are *not* using on-demand DynamoDB tables, and instead +prepaying for provisioned I/O capacity. -1. You are billed for the I/O capacity allocated to the table, +1. You are billed for the provisioned I/O capacity allocated to the table, *even when the table is not used*. Sharing capacity can reduce costs. 1. You can share the "provision burden" across the buckets. That is, rather @@ -265,8 +267,13 @@ lower. S3Guard, because there is only one table to review and configure in the AWS management console. +1. When you don't grant the permission to create DynamoDB tables to users. +A single pre-created table for all buckets avoids the needs for an administrator +to create one for every bucket. + When wouldn't you want to share a table? +1. When you are using on-demand DynamoDB and want to keep each table isolated. 1. When you do explicitly want to provision I/O capacity to a specific bucket and table, isolated from others. @@ -315,18 +322,25 @@ Next, you can choose whether or not the table will be automatically created ``` -### 7. If creating a table: Set your DynamoDB I/O Capacity +### 7. If creating a table: Choose your billing mode (and perhaps I/O Capacity) -Next, you need to set the DynamoDB read and write throughput requirements you -expect to need for your cluster. Setting higher values will cost you more -money. *Note* that these settings only affect table creation when +Next, you need to decide whether to use On-Demand DynamoDB and its +pay-per-request billing (recommended), or to explicitly request a +provisioned IO capacity. + +Before AWS offered pay-per-request billing, the sole billing mechanism, +was "provisioned capacity". This mechanism requires you to choose +the DynamoDB read and write throughput requirements you +expect to need for your expected uses of the S3Guard table. +Setting higher values cost you more money -*even when the table was idle* + *Note* that these settings only affect table creation when `fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for an existing table, use the AWS console or CLI tool. For more details on DynamoDB capacity units, see the AWS page on [Capacity Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations). -The charges are incurred per hour for the life of the table, *even when the +Provisioned IO capacity is billed per hour for the life of the table, *even when the table and the underlying S3 buckets are not being used*. There are also charges incurred for data storage and for data I/O outside of the @@ -334,34 +348,56 @@ region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path and summary details of objects —the actual data is stored in S3, so billed at S3 rates. +With provisioned I/O capacity, attempting to perform more I/O than the capacity +requested throttles the operation and may result in operations failing. +Larger I/O capacities cost more. + +With the introduction of On-Demand DynamoDB, you can now avoid paying for +provisioned capacity by creating an on-demand table. +With an on-demand table you are not throttled if your DynamoDB requests exceed +any pre-provisioned limit, nor do you pay per hour even when a table is idle. + +You do, however, pay more per DynamoDB operation. +Even so, the ability to cope with sudden bursts of read or write requests, combined +with the elimination of charges for idle tables, suit the use patterns made of +S3Guard tables by applications interacting with S3. That is: periods when the table +is rarely used, with intermittent high-load operations when directory trees +are scanned (query planning and similar), or updated (rename and delete operations). + + +We recommending using On-Demand DynamoDB for maximum performance in operations +such as query planning, and lowest cost when S3 buckets are not being accessed. + +This is the default, as configured in the default configuration options. + ```xml fs.s3a.s3guard.ddb.table.capacity.read - 500 + 0 Provisioned throughput requirements for read operations in terms of capacity - units for the DynamoDB table. This config value will only be used when - creating a new DynamoDB table, though later you can manually provision by - increasing or decreasing read capacity as needed for existing tables. - See DynamoDB documents for more information. + units for the DynamoDB table. This config value will only be used when + creating a new DynamoDB table. + If set to 0 (the default), new tables are created with "per-request" capacity. + If a positive integer is provided for this and the write capacity, then + a table with "provisioned capacity" will be created. + You can change the capacity of an existing provisioned-capacity table + through the "s3guard set-capacity" command. fs.s3a.s3guard.ddb.table.capacity.write - 100 + 0 Provisioned throughput requirements for write operations in terms of - capacity units for the DynamoDB table. Refer to related config - fs.s3a.s3guard.ddb.table.capacity.read before usage. + capacity units for the DynamoDB table. + If set to 0 (the default), new tables are created with "per-request" capacity. + Refer to related configuration option fs.s3a.s3guard.ddb.table.capacity.read ``` -Attempting to perform more I/O than the capacity requested throttles the -I/O, and may result in operations failing. Larger I/O capacities cost more. -We recommending using small read and write capacities when initially experimenting -with S3Guard, and considering DynamoDB On-Demand. ## Authenticating with S3Guard @@ -369,9 +405,7 @@ The DynamoDB metadata store takes advantage of the fact that the DynamoDB service uses the same authentication mechanisms as S3. S3Guard gets all its credentials from the S3A client that is using it. -All existing S3 authentication mechanisms can be used, except for one -exception. Credentials placed in URIs are not supported for S3Guard, for security -reasons. +All existing S3 authentication mechanisms can be used. ## Per-bucket S3Guard configuration @@ -512,7 +546,13 @@ hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET ) Creates and initializes an empty metadata store. A DynamoDB metadata store can be initialized with additional parameters -pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html): +pertaining to capacity. + +If these values are both zero, then an on-demand DynamoDB table is created; +if positive values then they set the +[Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html) +of the table. + ```bash [-write PROVISIONED_WRITES] [-read PROVISIONED_READS] @@ -528,29 +568,31 @@ metadata store will be created with these tags in DynamoDB. Example 1 ```bash -hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1 +hadoop s3guard init -meta dynamodb://ireland-team -write 0 -read 0 s3a://ireland-1 ``` -Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads, -in the same location as the bucket "ireland-1". +Creates an on-demand table "ireland-team", +in the same location as the S3 bucket "ireland-1". Example 2 ```bash -hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 +hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 --read 0 --write 0 ``` Creates a table "ireland-team" in the region "eu-west-1.amazonaws.com" - Example 3 ```bash hadoop s3guard init -meta dynamodb://ireland-team -tag tag1=first;tag2=second; ``` -Creates a table "ireland-team" with tags "first" and "second". +Creates a table "ireland-team" with tags "first" and "second". The read and +write capacity will be those of the site configuration's values of +`fs.s3a.s3guard.ddb.table.capacity.read` and `fs.s3a.s3guard.ddb.table.capacity.write`; +if these are both zero then it will be an on-demand table. ### Import a bucket: `s3guard import` @@ -588,7 +630,7 @@ hadoop s3guard diff s3a://ireland-1 Prints and optionally checks the s3guard and encryption status of a bucket. ```bash -hadoop s3guard bucket-info [ -guarded ] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] s3a://BUCKET +hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] s3a://BUCKET ``` Options @@ -788,7 +830,8 @@ the region "eu-west-1". ### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity` -Alter the read and/or write capacity of a s3guard table. +Alter the read and/or write capacity of a s3guard table created with provisioned +I/O capacity. ```bash hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a://BUCKET ) @@ -796,6 +839,9 @@ hadoop s3guard set-capacity [--read UNIT] [--write UNIT] ( -region REGION | s3a: The `--read` and `--write` units are those of `s3guard init`. +It cannot be used to change the I/O capacity of an on demand table (there is +no need), and nor can it be used to convert an existing table to being +on-demand. For that the AWS console must be used. Example @@ -932,10 +978,10 @@ merits more testing before it could be considered reliable. ## Managing DynamoDB I/O Capacity -By default, DynamoDB is not only billed on use (data and I/O requests) --it is billed on allocated I/O Capacity. +Historically, DynamoDB has been not only billed on use (data and I/O requests) +-but on provisioned I/O Capacity. -When an application makes more requests than +With Provisioned IO, when an application makes more requests than the allocated capacity permits, the request is rejected; it is up to the calling application to detect when it is being so throttled and react. S3Guard does this, but as a result: when the client is being @@ -943,7 +989,7 @@ throttled, operations are slower. This capacity throttling is averaged over a few minutes: a briefly overloaded table will not be throttled, but the rate cannot be sustained. -The load on a table isvisible in the AWS console: go to the +The load on a table is visible in the AWS console: go to the DynamoDB page for the table and select the "metrics" tab. If the graphs of throttled read or write requests show that a lot of throttling has taken place, then there is not @@ -1015,20 +1061,33 @@ for S3Guard applications. * There's no explicit limit on I/O capacity, so operations which make heavy use of S3Guard tables (for example: SQL query planning) do not get throttled. +* You are charged more per DynamoDB API call, in exchange for paying nothing +when you are not interacting with DynamoDB. * There's no way put a limit on the I/O; you may unintentionally run up large bills through sustained heavy load. * The `s3guard set-capacity` command fails: it does not make sense any more. When idle, S3Guard tables are only billed for the data stored, not for -any unused capacity. For this reason, there is no benefit from sharing -a single S3Guard table across multiple buckets. +any unused capacity. For this reason, there is no performance benefit +from sharing a single S3Guard table across multiple buckets. -*Enabling DynamoDB On-Demand for a S3Guard table* +*Creating a S3Guard Table with On-Demand Tables* -You cannot currently enable DynamoDB on-demand from the `s3guard` command -when creating or updating a bucket. +The default settings for S3Guard are to create on-demand tables; this +can also be done explicitly in the `s3guard init` command by setting the +read and write capacities to zero. -Instead it must be done through the AWS console or [the CLI](https://docs.aws.amazon.com/cli/latest/reference/dynamodb/update-table.html). + +```bash +hadoop s3guard init -meta dynamodb://ireland-team -write 0 -read 0 s3a://ireland-1 +``` + +*Enabling DynamoDB On-Demand for an existing S3Guard table* + +You cannot currently convert an existing S3Guard table to being an on-demand +table through the `s3guard` command. + +It can be done through the AWS console or [the CLI](https://docs.aws.amazon.com/cli/latest/reference/dynamodb/update-table.html). From the Web console or the command line, switch the billing to pay-per-request. Once enabled, the read and write capacities of the table listed in the @@ -1078,7 +1137,7 @@ Metadata Store Diagnostics: The "magic" committer is supported ``` -### Autoscaling S3Guard tables. +### Autoscaling (Provisioned Capacity) S3Guard tables. [DynamoDB Auto Scaling](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AutoScaling.html) can automatically increase and decrease the allocated capacity. @@ -1093,7 +1152,7 @@ until any extra capacity is allocated. Furthermore, as this retrying will block the threads from performing other operations -including more I/O, the the autoscale may not scale fast enough. -This is why the DynamoDB On-Demand appears to be a better option for +This is why the DynamoDB On-Demand appears is a better option for workloads with Hadoop, Spark, Hive and other applications. If autoscaling is to be used, we recommend experimenting with the option, @@ -1259,18 +1318,18 @@ Error Code: ProvisionedThroughputExceededException; ``` The I/O load of clients of the (shared) DynamoDB table was exceeded. -1. Increase the capacity of the DynamoDB table. -1. Increase the retry count and/or sleep time of S3Guard on throttle events. -1. Enable capacity autoscaling for the table in the AWS console. +1. Switch to On-Demand Dynamo DB tables (AWS console) +1. Increase the capacity of the DynamoDB table (AWS console or `s3guard set-capacity`)/ +1. Increase the retry count and/or sleep time of S3Guard on throttle events (Hadoop configuration). ### Error `Max retries exceeded` The I/O load of clients of the (shared) DynamoDB table was exceeded, and the number of attempts to retry the operation exceeded the configured amount. +1. Switch to On-Demand Dynamo DB tables (AWS console). 1. Increase the capacity of the DynamoDB table. 1. Increase the retry count and/or sleep time of S3Guard on throttle events. -1. Enable capacity autoscaling for the table in the AWS console. ### Error when running `set-capacity`: `org.apache.hadoop.fs.s3a.AWSServiceThrottledException: ProvisionTable` @@ -1286,7 +1345,7 @@ Next decrease can be made at Wednesday, July 25, 2018 9:48:14 PM UTC ``` There's are limit on how often you can change the capacity of an DynamoDB table; -if you call set-capacity too often, it fails. Wait until the after the time indicated +if you call `set-capacity` too often, it fails. Wait until the after the time indicated and try again. ### Error `Invalid region specified` diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 9d6e1ce00b..81db77c6e1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -197,4 +197,14 @@ public interface S3ATestConstants { Duration TEST_SESSION_TOKEN_DURATION = Duration.ofSeconds( TEST_SESSION_TOKEN_DURATION_SECONDS); + /** + * Test table name to use during DynamoDB integration tests in + * {@code ITestDynamoDBMetadataStore}. + * + * The table will be modified, and deleted in the end of the tests. + * If this value is not set, the integration tests that would be destructive + * won't run. + */ + String S3GUARD_DDB_TEST_TABLE_NAME_KEY = + "fs.s3a.s3guard.ddb.test.table"; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index 589628c5c9..9241686090 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -59,7 +59,6 @@ import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_METASTORE_NULL; import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestDynamoTablePrefix; import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.E_BAD_STATE; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS; @@ -332,7 +331,14 @@ public void testSetCapacityFailFastOnReadWriteOfZero() throws Exception{ @Test public void testBucketInfoUnguarded() throws Exception { final Configuration conf = getConfiguration(); + URI fsUri = getFileSystem().getUri(); conf.set(S3GUARD_DDB_TABLE_CREATE_KEY, Boolean.FALSE.toString()); + String bucket = fsUri.getHost(); + clearBucketOption(conf, bucket, + S3GUARD_DDB_TABLE_CREATE_KEY); + clearBucketOption(conf, bucket, S3_METADATA_STORE_IMPL); + clearBucketOption(conf, bucket, S3GUARD_DDB_TABLE_NAME_KEY); + conf.set(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, "testBucketInfoUnguarded-" + UUID.randomUUID()); @@ -341,7 +347,7 @@ public void testBucketInfoUnguarded() throws Exception { S3GuardTool.BucketInfo infocmd = new S3GuardTool.BucketInfo(conf); String info = exec(infocmd, S3GuardTool.BucketInfo.NAME, "-" + S3GuardTool.BucketInfo.UNGUARDED_FLAG, - getFileSystem().getUri().toString()); + fsUri.toString()); assertTrue("Output should contain information about S3A client " + info, info.contains("S3A Client")); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DDBCapacities.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DDBCapacities.java index c6e47c7518..3f1e99061b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DDBCapacities.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/DDBCapacities.java @@ -21,10 +21,15 @@ import java.util.Map; import java.util.Objects; +import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; import org.junit.Assert; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.READ_CAPACITY; +import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.WRITE_CAPACITY; +/** + * Tuple of read and write capacity of a DDB table. + */ class DDBCapacities { private final long read, write; @@ -49,12 +54,6 @@ String getWriteStr() { return Long.toString(write); } - void checkEquals(String text, DDBCapacities that) throws Exception { - if (!this.equals(that)) { - throw new Exception(text + " expected = " + this +"; actual = "+ that); - } - } - @Override public boolean equals(Object o) { if (this == o) { @@ -82,7 +81,7 @@ public String toString() { } /** - * Is the the capacity that of a pay-on-demand table? + * Is the the capacity that of an On-Demand table? * @return true if the capacities are both 0. */ public boolean isOnDemandTable() { @@ -102,7 +101,19 @@ public static DDBCapacities extractCapacities( read); return new DDBCapacities( Long.parseLong(read), - Long.parseLong(diagnostics.get(DynamoDBMetadataStore.WRITE_CAPACITY))); + Long.parseLong(diagnostics.get(WRITE_CAPACITY))); + } + + /** + * Given a throughput information from table.describe(), build + * a DDBCapacities object. + * @param throughput throughput description. + * @return the capacities + */ + public static DDBCapacities extractCapacities( + ProvisionedThroughputDescription throughput) { + return new DDBCapacities(throughput.getReadCapacityUnits(), + throughput.getWriteCapacityUnits()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java index 972cbe5f5e..149d1f3606 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java @@ -33,7 +33,6 @@ import com.amazonaws.services.dynamodbv2.document.PrimaryKey; import com.amazonaws.services.dynamodbv2.document.Table; import com.amazonaws.services.dynamodbv2.model.ListTagsOfResourceRequest; -import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription; import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException; import com.amazonaws.services.dynamodbv2.model.TableDescription; @@ -43,6 +42,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.contract.s3a.S3AContract; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3ATestConstants; import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.io.IOUtils; @@ -64,6 +64,7 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.s3a.S3AUtils.clearBucketOption; import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*; import static org.apache.hadoop.test.LambdaTestUtils.*; @@ -78,7 +79,15 @@ * * According to the base class, every test case will have independent contract * to create a new {@link S3AFileSystem} instance and initializes it. - * A table will be created and shared between the tests, + * A table will be created and shared between the tests; some tests also + * create their own. + * + * Important: Any new test which creates a table must do the following + *

    + *
  1. Enable on-demand pricing.
  2. + *
  3. Always destroy the table, even if an assertion fails.
  4. + *
+ * This is needed to avoid "leaking" DDB tables and running up bills. */ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase { @@ -121,7 +130,7 @@ public void setUp() throws Exception { Assume.assumeTrue("Test DynamoDB table name should be set to run " + "integration tests.", testDynamoDBTableName != null); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName); - + enableOnDemand(conf); s3AContract = new S3AContract(conf); s3AContract.init(); @@ -141,36 +150,40 @@ public void setUp() throws Exception { } } - @BeforeClass public static void beforeClassSetup() throws IOException { Configuration conf = prepareTestConfiguration(new Configuration()); assumeThatDynamoMetadataStoreImpl(conf); // S3GUARD_DDB_TEST_TABLE_NAME_KEY and S3GUARD_DDB_TABLE_NAME_KEY should // be configured to use this test. - testDynamoDBTableName = conf.get(S3GUARD_DDB_TEST_TABLE_NAME_KEY); + testDynamoDBTableName = conf.get( + S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY); String dynamoDbTableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY); - Assume.assumeTrue("No DynamoDB table name configured", !StringUtils - .isEmpty(dynamoDbTableName)); + Assume.assumeTrue("No DynamoDB table name configured", + !StringUtils.isEmpty(dynamoDbTableName)); // We should assert that the table name is configured, so the test should // fail if it's not configured. - assertTrue("Test DynamoDB table name '" - + S3GUARD_DDB_TEST_TABLE_NAME_KEY + "' should be set to run " - + "integration tests.", testDynamoDBTableName != null); + assertNotNull("Test DynamoDB table name '" + + S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY + "'" + + " should be set to run integration tests.", + testDynamoDBTableName); // We should assert that the test table is not the same as the production // table, as the test table could be modified and destroyed multiple // times during the test. - assertTrue("Test DynamoDB table name: '" - + S3GUARD_DDB_TEST_TABLE_NAME_KEY + "' and production table name: '" - + S3GUARD_DDB_TABLE_NAME_KEY + "' can not be the same.", - !conf.get(S3GUARD_DDB_TABLE_NAME_KEY).equals(testDynamoDBTableName)); + assertNotEquals("Test DynamoDB table name: " + + "'" + S3ATestConstants.S3GUARD_DDB_TEST_TABLE_NAME_KEY + "'" + + " and production table name: " + + "'" + S3GUARD_DDB_TABLE_NAME_KEY + "' can not be the same.", + testDynamoDBTableName, conf.get(S3GUARD_DDB_TABLE_NAME_KEY)); // We can use that table in the test if these assertions are valid conf.set(S3GUARD_DDB_TABLE_NAME_KEY, testDynamoDBTableName); LOG.debug("Creating static ddbms which will be shared between tests."); + enableOnDemand(conf); + ddbmsStatic = new DynamoDBMetadataStore(); ddbmsStatic.initialize(conf); } @@ -198,18 +211,23 @@ private static void assumeThatDynamoMetadataStoreImpl(Configuration conf){ @Override public void tearDown() throws Exception { - LOG.info("Removing data from ddbms table in teardown."); - // The following is a way to be sure the table will be cleared and there - // will be no leftovers after the test. - PathMetadata meta = ddbmsStatic.get(strToPath("/")); - if (meta != null){ - for (DescendantsIterator desc = new DescendantsIterator(ddbmsStatic, meta); - desc.hasNext();) { - ddbmsStatic.forgetMetadata(desc.next().getPath()); + try { + if (ddbmsStatic != null) { + LOG.info("Removing data from ddbms table in teardown."); + // The following is a way to be sure the table will be cleared and there + // will be no leftovers after the test. + PathMetadata meta = ddbmsStatic.get(strToPath("/")); + if (meta != null){ + for (DescendantsIterator desc = + new DescendantsIterator(ddbmsStatic, meta); + desc.hasNext();) { + ddbmsStatic.forgetMetadata(desc.next().getPath()); + } + } } + } catch (IOException ignored) { } - - fileSystem.close(); + IOUtils.cleanupWithLogger(LOG, fileSystem); } /** @@ -263,6 +281,29 @@ private S3AFileSystem getFileSystem() { return this.fileSystem; } + /** + * Force the configuration into DDB on demand, so that + * even if a test bucket isn't cleaned up, the cost is $0. + * @param conf configuration to patch. + */ + public static void enableOnDemand(Configuration conf) { + conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 0); + conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 0); + } + + /** + * Get the configuration needed to create a table; extracts + * it from the filesystem then always patches it to be on demand. + * Why the patch? It means even if a cached FS has brought in + * some provisioned values, they get reset. + * @return a new configuration + */ + private Configuration getTableCreationConfig() { + Configuration conf = new Configuration(getFileSystem().getConf()); + enableOnDemand(conf); + return conf; + } + /** * This tests that after initialize() using an S3AFileSystem object, the * instance should have been initialized successfully, and tables are ACTIVE. @@ -272,9 +313,11 @@ public void testInitialize() throws IOException { final S3AFileSystem s3afs = this.fileSystem; final String tableName = getTestTableName("testInitialize"); - final Configuration conf = s3afs.getConf(); + Configuration conf = getFileSystem().getConf(); + enableOnDemand(conf); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); - try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(s3afs); verifyTableInitialized(tableName, ddbms.getDynamoDB()); assertNotNull(ddbms.getTable()); @@ -285,7 +328,9 @@ public void testInitialize() throws IOException { " region as S3 bucket", expectedRegion, ddbms.getRegion()); + } finally { ddbms.destroy(); + ddbms.close(); } } @@ -297,7 +342,7 @@ public void testInitialize() throws IOException { public void testInitializeWithConfiguration() throws IOException { final String tableName = getTestTableName("testInitializeWithConfiguration"); - final Configuration conf = getFileSystem().getConf(); + final Configuration conf = getTableCreationConfig(); conf.unset(S3GUARD_DDB_TABLE_NAME_KEY); String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY, getFileSystem().getBucketLocation()); @@ -316,7 +361,8 @@ public void testInitializeWithConfiguration() throws IOException { } // config region conf.set(S3GUARD_DDB_REGION_KEY, savedRegion); - try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(conf); verifyTableInitialized(tableName, ddbms.getDynamoDB()); assertNotNull(ddbms.getTable()); @@ -324,7 +370,9 @@ public void testInitializeWithConfiguration() throws IOException { assertEquals("Unexpected key schema found!", keySchema(), ddbms.getTable().describe().getKeySchema()); + } finally { ddbms.destroy(); + ddbms.close(); } } @@ -434,13 +482,14 @@ public void testItemLacksVersion() throws Throwable { @Test public void testTableVersionRequired() throws Exception { String tableName = getTestTableName("testTableVersionRequired"); - Configuration conf = getFileSystem().getConf(); + Configuration conf = getTableCreationConfig(); int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES, S3GUARD_DDB_MAX_RETRIES_DEFAULT); conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); - try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(conf); Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB()); table.deleteItem(VERSION_MARKER_PRIMARY_KEY); @@ -450,7 +499,9 @@ public void testTableVersionRequired() throws Exception { () -> ddbms.initTable()); conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries); + } finally { ddbms.destroy(); + ddbms.close(); } } @@ -461,10 +512,11 @@ public void testTableVersionRequired() throws Exception { @Test public void testTableVersionMismatch() throws Exception { String tableName = getTestTableName("testTableVersionMismatch"); - Configuration conf = getFileSystem().getConf(); + Configuration conf = getTableCreationConfig(); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); - try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(conf); Table table = verifyTableInitialized(tableName, ddbms.getDynamoDB()); table.deleteItem(VERSION_MARKER_PRIMARY_KEY); @@ -474,7 +526,9 @@ public void testTableVersionMismatch() throws Exception { // create existing table intercept(IOException.class, E_INCOMPATIBLE_VERSION, () -> ddbms.initTable()); + } finally { ddbms.destroy(); + ddbms.close(); } } @@ -491,10 +545,18 @@ public void testFailNonexistentTable() throws IOException { getTestTableName("testFailNonexistentTable"); final S3AFileSystem s3afs = getFileSystem(); final Configuration conf = s3afs.getConf(); + enableOnDemand(conf); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); + String b = fsUri.getHost(); + clearBucketOption(conf, b, S3GUARD_DDB_TABLE_CREATE_KEY); + clearBucketOption(conf, b, S3_METADATA_STORE_IMPL); + clearBucketOption(conf, b, S3GUARD_DDB_TABLE_NAME_KEY); conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY); try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { ddbms.initialize(s3afs); + // if an exception was not raised, a table was created. + // So destroy it before failing. + ddbms.destroy(); fail("Should have failed as table does not exist and table auto-creation" + " is disabled"); } catch (IOException ignored) { @@ -606,31 +668,36 @@ public void testMovePopulatesAncestors() throws IOException { public void testProvisionTable() throws Exception { final String tableName = getTestTableName("testProvisionTable-" + UUID.randomUUID()); - Configuration conf = getFileSystem().getConf(); + final Configuration conf = getTableCreationConfig(); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); - - try(DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 2); + conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 2); + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(conf); DynamoDB dynamoDB = ddbms.getDynamoDB(); - final ProvisionedThroughputDescription oldProvision = - dynamoDB.getTable(tableName).describe().getProvisionedThroughput(); - ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2, - oldProvision.getWriteCapacityUnits() * 2); + final DDBCapacities oldProvision = DDBCapacities.extractCapacities( + dynamoDB.getTable(tableName).describe().getProvisionedThroughput()); + Assume.assumeFalse("Table is on-demand", oldProvision.isOnDemandTable()); + long desiredReadCapacity = oldProvision.getRead() - 1; + long desiredWriteCapacity = oldProvision.getWrite() - 1; + ddbms.provisionTable(desiredReadCapacity, + desiredWriteCapacity); ddbms.initTable(); // we have to wait until the provisioning settings are applied, // so until the table is ACTIVE again and not in UPDATING ddbms.getTable().waitForActive(); - final ProvisionedThroughputDescription newProvision = - dynamoDB.getTable(tableName).describe().getProvisionedThroughput(); - LOG.info("Old provision = {}, new provision = {}", oldProvision, - newProvision); + final DDBCapacities newProvision = DDBCapacities.extractCapacities( + dynamoDB.getTable(tableName).describe().getProvisionedThroughput()); assertEquals("Check newly provisioned table read capacity units.", - oldProvision.getReadCapacityUnits() * 2, - newProvision.getReadCapacityUnits().longValue()); + desiredReadCapacity, + newProvision.getRead()); assertEquals("Check newly provisioned table write capacity units.", - oldProvision.getWriteCapacityUnits() * 2, - newProvision.getWriteCapacityUnits().longValue()); + desiredWriteCapacity, + newProvision.getWrite()); + } finally { ddbms.destroy(); + ddbms.close(); } } @@ -639,9 +706,11 @@ public void testDeleteTable() throws Exception { final String tableName = getTestTableName("testDeleteTable"); Path testPath = new Path(new Path(fsUri), "/" + tableName); final S3AFileSystem s3afs = getFileSystem(); - final Configuration conf = s3afs.getConf(); + final Configuration conf = getTableCreationConfig(); conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName); - try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + enableOnDemand(conf); + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(s3afs); // we can list the empty table ddbms.listChildren(testPath); @@ -649,23 +718,22 @@ public void testDeleteTable() throws Exception { ddbms.destroy(); verifyTableNotExist(tableName, dynamoDB); - // delete table once more; be ResourceNotFoundException swallowed silently + // delete table once more; the ResourceNotFoundException swallowed + // silently ddbms.destroy(); verifyTableNotExist(tableName, dynamoDB); - try { - // we can no longer list the destroyed table - ddbms.listChildren(testPath); - fail("Should have failed after the table is destroyed!"); - } catch (IOException ignored) { - } + intercept(IOException.class, "", + "Should have failed after the table is destroyed!", + () -> ddbms.listChildren(testPath)); + } finally { ddbms.destroy(); + ddbms.close(); } } @Test public void testTableTagging() throws IOException { - final Configuration conf = getFileSystem().getConf(); - + final Configuration conf = getTableCreationConfig(); // clear all table tagging config before this test conf.getPropsWithPrefix(S3GUARD_DDB_TABLE_TAG).keySet().forEach( propKey -> conf.unset(S3GUARD_DDB_TABLE_TAG + propKey) @@ -683,7 +751,8 @@ public void testTableTagging() throws IOException { conf.set(S3GUARD_DDB_TABLE_TAG + tagEntry.getKey(), tagEntry.getValue()); } - try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) { + DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore(); + try { ddbms.initialize(conf); assertNotNull(ddbms.getTable()); assertEquals(tableName, ddbms.getTable().getTableName()); @@ -696,6 +765,9 @@ public void testTableTagging() throws IOException { for (Tag tag : tags) { Assert.assertEquals(tagMap.get(tag.getKey()), tag.getValue()); } + } finally { + ddbms.destroy(); + ddbms.close(); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java index 98c1e998ed..45c5e79fad 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolDynamoDB.java @@ -44,7 +44,6 @@ import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_NAME_KEY; import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_TAG; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestDynamoTablePrefix; import static org.apache.hadoop.fs.s3a.S3AUtils.setBucketOption; import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*; import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.*; @@ -178,8 +177,8 @@ public void testDynamoDBInitDestroyCycle() throws Throwable { expectSuccess("Init command did not exit successfully - see output", initCmd, Init.NAME, - "-" + READ_FLAG, "2", - "-" + WRITE_FLAG, "2", + "-" + READ_FLAG, "0", + "-" + WRITE_FLAG, "0", "-" + META_FLAG, "dynamodb://" + testTableName, testS3Url); // Verify it exists @@ -210,39 +209,21 @@ public void testDynamoDBInitDestroyCycle() throws Throwable { testS3Url); assertTrue("No Dynamo diagnostics in output " + info, info.contains(DESCRIPTION)); + assertTrue("No Dynamo diagnostics in output " + info, + info.contains(DESCRIPTION)); // get the current values to set again // play with the set-capacity option + String fsURI = getFileSystem().getUri().toString(); DDBCapacities original = getCapacities(); - String fsURI = getFileSystem().getUri().toString(); - if (!original.isOnDemandTable()) { - // classic provisioned table - assertTrue("Wrong billing mode in " + info, - info.contains(BILLING_MODE_PROVISIONED)); - String capacityOut = exec(newSetCapacity(), - SetCapacity.NAME, - fsURI); - LOG.info("Set Capacity output=\n{}", capacityOut); - capacityOut = exec(newSetCapacity(), - SetCapacity.NAME, - "-" + READ_FLAG, original.getReadStr(), - "-" + WRITE_FLAG, original.getWriteStr(), - fsURI); - LOG.info("Set Capacity output=\n{}", capacityOut); - } else { - // on demand table - assertTrue("Wrong billing mode in " + info, - info.contains(BILLING_MODE_PER_REQUEST)); - // on demand tables fail here, so expect that - intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY, - () -> exec(newSetCapacity(), - SetCapacity.NAME, + assertTrue("Wrong billing mode in " + info, + info.contains(BILLING_MODE_PER_REQUEST)); + // per-request tables fail here, so expect that + intercept(IOException.class, E_ON_DEMAND_NO_SET_CAPACITY, + () -> exec(newSetCapacity(), + SetCapacity.NAME, fsURI)); - } - - // that call does not change the values - original.checkEquals("unchanged", getCapacities()); // Destroy MetadataStore Destroy destroyCmd = new Destroy(fs.getConf()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index 47544f4eb6..55f4707fe4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -44,7 +44,6 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.HadoopTestBase; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.isMetadataStoreAuthoritative; import static org.apache.hadoop.fs.s3a.S3ATestUtils.metadataStorePersistsAuthoritativeBit; /**