From 6c6d1b64d4a7cd5288fcded78043acaf23228f96 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 2 Nov 2021 13:26:16 +0000 Subject: [PATCH] HADOOP-17928. Syncable: S3A to warn and downgrade (#3585) This switches the default behavior of S3A output streams to warning that Syncable.hsync() or hflush() have been called; it's not considered an error unless the defaults are overridden. This avoids breaking applications which call the APIs, at the risk of people trying to use S3 as a safe store of streamed data (HBase WALs, audit logs etc). Contributed by Steve Loughran. --- .../src/main/resources/core-default.xml | 11 ++++- .../org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../tools/hadoop-aws/troubleshooting_s3a.md | 40 ++++++++++++++----- .../fs/s3a/TestS3ABlockOutputStream.java | 4 ++ 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index f2c04427d2..27c86bbc9a 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -2205,7 +2205,16 @@ - + + fs.s3a.downgrade.syncable.exceptions + true + + Warn but continue when applications use Syncable.hsync when writing + to S3A. + + + + fs.AbstractFileSystem.wasb.impl org.apache.hadoop.fs.azure.Wasb diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 0eadc35c3c..ac98af5ada 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -387,7 +387,7 @@ private Constants() { * Value: {@value}. */ public static final boolean DOWNGRADE_SYNCABLE_EXCEPTIONS_DEFAULT = - false; + true; /** * The capacity of executor queues for operations other than block diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 6f55d9effe..d55e522910 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -924,30 +924,48 @@ connector isn't saving any data at all. The `Syncable` API, especially the `hsync()` call, are critical for applications such as HBase to safely persist data. -The S3A connector throws an `UnsupportedOperationException` when these API calls -are made, because the guarantees absolutely cannot be met: nothing is being flushed -or saved. +When configured to do so, the S3A connector throws an `UnsupportedOperationException` +when these API calls are made, because the API guarantees absolutely cannot be met: +_nothing is being flushed or saved_. -* Applications which intend to invoke the Syncable APIs call `hasCapability("hsync")` on +* Applications which intend to invoke the Syncable APIs should call `hasCapability("hsync")` on the stream to see if they are supported. * Or catch and downgrade `UnsupportedOperationException`. -These recommendations _apply to all filesystems_. +These recommendations _apply to all filesystems_. -To downgrade the S3A connector to simply warning of the use of +For consistency with other filesystems, S3A output streams +do not by default reject the `Syncable` calls -instead +they print a warning of its use. + + +The count of invocations of the two APIs are collected in the S3A filesystem +Statistics/IOStatistics and so their use can be monitored. + +To switch the S3A connector to rejecting all use of `hsync()` or `hflush()` calls, set the option -`fs.s3a.downgrade.syncable.exceptions` to true. +`fs.s3a.downgrade.syncable.exceptions` to `false`. ```xml fs.s3a.downgrade.syncable.exceptions - true + false ``` -The count of invocations of the two APIs are collected -in the S3A filesystem Statistics/IOStatistics and so -their use can be monitored. +Regardless of the setting, the `Syncable` API calls do not work. +Telling the store to *not* downgrade the calls is a way to +1. Prevent applications which require Syncable to work from being deployed + against S3. +2. Identify applications which are making the calls even though they don't + need to. These applications can then be fixed -something which may take + time. + +Put differently: it is safest to disable downgrading syncable exceptions. +However, enabling the downgrade stops applications unintentionally using the API +from breaking. + +*Tip*: try turning it on in staging environments to see what breaks. ### `RemoteFileChangedException` and read-during-overwrite diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java index 9ae24c19f3..21f268dfb2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ABlockOutputStream.java @@ -141,6 +141,10 @@ public void testCallingCloseAfterCallingAbort() throws Exception { */ @Test public void testSyncableUnsupported() throws Exception { + final S3ABlockOutputStream.BlockOutputStreamBuilder + builder = mockS3ABuilder(); + builder.withDowngradeSyncableExceptions(false); + stream = spy(new S3ABlockOutputStream(builder)); intercept(UnsupportedOperationException.class, () -> stream.hflush()); intercept(UnsupportedOperationException.class, () -> stream.hsync()); }