From 22f6d55b71e53fc25f72e18e89a2a647ff8c0dca Mon Sep 17 00:00:00 2001
From: Ankit Saurabh <112006204+sauraank@users.noreply.github.com>
Date: Thu, 2 Feb 2023 18:45:21 +0000
Subject: [PATCH] HADOOP-18246. Reduce lower limit on
fs.s3a.prefetch.block.size to 1 byte. (#5120)
The minimum value of fs.s3a.prefetch.block.size is now 1
Contributed by Ankit Saurabh
---
.../src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 3 +--
.../hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md | 1 +
.../src/site/markdown/tools/hadoop-aws/prefetching.md | 4 ++++
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 420a92788c..cb17b80fb6 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -531,8 +531,7 @@ public void initialize(URI name, Configuration originalConf)
this.prefetchEnabled = conf.getBoolean(PREFETCH_ENABLED_KEY, PREFETCH_ENABLED_DEFAULT);
long prefetchBlockSizeLong =
- longBytesOption(conf, PREFETCH_BLOCK_SIZE_KEY, PREFETCH_BLOCK_DEFAULT_SIZE,
- PREFETCH_BLOCK_DEFAULT_SIZE);
+ longBytesOption(conf, PREFETCH_BLOCK_SIZE_KEY, PREFETCH_BLOCK_DEFAULT_SIZE, 1);
if (prefetchBlockSizeLong > (long) Integer.MAX_VALUE) {
throw new IOException("S3A prefatch block size exceeds int limit");
}
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 9de259e798..ae042b1619 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -1108,6 +1108,7 @@ options are covered in [Testing](./testing.md).
8MB
The size of a single prefetched block of data.
+ Decreasing this will increase the number of prefetches required, and may negatively impact performance.
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md
index e966c2dce4..8bb85008e3 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md
@@ -43,6 +43,10 @@ Multiple blocks may be read in parallel.
|`fs.s3a.prefetch.block.size` |Size of a block |`8M` |
|`fs.s3a.prefetch.block.count` |Number of blocks to prefetch |`8` |
+The default size of a block is 8MB, and the minimum allowed block size is 1 byte.
+Decreasing block size will increase the number of blocks to be read for a file.
+A smaller block size may negatively impact performance as the number of prefetches required will increase.
+
### Key Components
`S3PrefetchingInputStream` - When prefetching is enabled, S3AFileSystem will return an instance of