From 9db61adeda59f3219068ff43a8388719d556e673 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Sun, 24 Apr 2022 17:23:19 +0100 Subject: [PATCH] HADOOP-16202. Enhanced openFile(): hadoop-aws changes. (#2584/3) S3A input stream support for the few fs.option.openfile settings. As well as supporting the read policy option and values, if the file length is declared in fs.option.openfile.length then no HEAD request will be issued when opening a file. This can cut a few tens of milliseconds off the operation. The patch adds a new openfile parameter/FS configuration option fs.s3a.input.async.drain.threshold (default: 16000). It declares the number of bytes remaining in the http input stream above which any operation to read and discard the rest of the stream, "draining", is executed asynchronously. This asynchronous draining offers some performance benefit on seek-heavy file IO. Contributed by Steve Loughran. Change-Id: I9b0626bbe635e9fd97ac0f463f5e7167e0111e39 --- .../dev-support/findbugs-exclude.xml | 5 + .../org/apache/hadoop/fs/s3a/Constants.java | 45 +- .../org/apache/hadoop/fs/s3a/Invoker.java | 28 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 283 ++++----- .../apache/hadoop/fs/s3a/S3AInputPolicy.java | 95 ++- .../apache/hadoop/fs/s3a/S3AInputStream.java | 275 +++++--- .../hadoop/fs/s3a/S3AInstrumentation.java | 12 +- .../hadoop/fs/s3a/S3AReadOpContext.java | 101 ++- .../hadoop/fs/s3a/S3ObjectAttributes.java | 15 +- .../org/apache/hadoop/fs/s3a/Statistic.java | 17 + .../fs/s3a/commit/CommitOperations.java | 8 +- .../fs/s3a/commit/files/PendingSet.java | 26 +- .../s3a/commit/files/SinglePendingCommit.java | 20 +- .../fs/s3a/impl/AbstractStoreOperation.java | 26 +- .../hadoop/fs/s3a/impl/CallableSupplier.java | 2 +- .../hadoop/fs/s3a/impl/InternalConstants.java | 21 +- .../hadoop/fs/s3a/impl/OpenFileSupport.java | 600 ++++++++++++++++++ .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 4 +- .../s3a/select/InternalSelectConstants.java | 2 +- .../hadoop/fs/s3a/select/SelectTool.java | 4 +- .../statistics/S3AInputStreamStatistics.java | 7 + .../impl/EmptyS3AStatisticsContext.java | 4 + .../site/markdown/tools/hadoop-aws/index.md | 8 + .../fs/contract/s3a/ITestS3AContractOpen.java | 67 ++ .../fs/contract/s3a/ITestS3AContractSeek.java | 17 +- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 4 + .../hadoop/fs/s3a/ITestS3AConfiguration.java | 11 - .../apache/hadoop/fs/s3a/S3ATestUtils.java | 4 +- .../fs/s3a/TestS3AInputStreamRetry.java | 17 +- .../apache/hadoop/fs/s3a/TestS3AUnbuffer.java | 5 +- .../fs/s3a/TestStreamChangeTracker.java | 2 +- .../fs/s3a/impl/TestOpenFileSupport.java | 429 +++++++++++++ .../fs/s3a/performance/ITestS3AOpenCost.java | 209 ++++++ .../fs/s3a/performance/OperationCost.java | 6 + .../scale/ITestS3AInputStreamPerformance.java | 58 +- .../fs/s3a/select/AbstractS3SelectTest.java | 2 +- .../hadoop/fs/s3a/select/ITestS3Select.java | 7 +- .../fs/s3a/select/ITestS3SelectMRJob.java | 4 +- 38 files changed, 2070 insertions(+), 380 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OpenFileSupport.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestOpenFileSupport.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java diff --git a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml index d0496de538..e823840fd7 100644 --- a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml @@ -28,6 +28,11 @@ + + + + +