From 22f6d55b71e53fc25f72e18e89a2a647ff8c0dca Mon Sep 17 00:00:00 2001 From: Ankit Saurabh <112006204+sauraank@users.noreply.github.com> Date: Thu, 2 Feb 2023 18:45:21 +0000 Subject: [PATCH] HADOOP-18246. Reduce lower limit on fs.s3a.prefetch.block.size to 1 byte. (#5120) The minimum value of fs.s3a.prefetch.block.size is now 1 Contributed by Ankit Saurabh --- .../src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 3 +-- .../hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md | 1 + .../src/site/markdown/tools/hadoop-aws/prefetching.md | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 420a92788c..cb17b80fb6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -531,8 +531,7 @@ public void initialize(URI name, Configuration originalConf) this.prefetchEnabled = conf.getBoolean(PREFETCH_ENABLED_KEY, PREFETCH_ENABLED_DEFAULT); long prefetchBlockSizeLong = - longBytesOption(conf, PREFETCH_BLOCK_SIZE_KEY, PREFETCH_BLOCK_DEFAULT_SIZE, - PREFETCH_BLOCK_DEFAULT_SIZE); + longBytesOption(conf, PREFETCH_BLOCK_SIZE_KEY, PREFETCH_BLOCK_DEFAULT_SIZE, 1); if (prefetchBlockSizeLong > (long) Integer.MAX_VALUE) { throw new IOException("S3A prefatch block size exceeds int limit"); } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 9de259e798..ae042b1619 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -1108,6 +1108,7 @@ options are covered in [Testing](./testing.md). 8MB The size of a single prefetched block of data. + Decreasing this will increase the number of prefetches required, and may negatively impact performance. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md index e966c2dce4..8bb85008e3 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/prefetching.md @@ -43,6 +43,10 @@ Multiple blocks may be read in parallel. |`fs.s3a.prefetch.block.size` |Size of a block |`8M` | |`fs.s3a.prefetch.block.count` |Number of blocks to prefetch |`8` | +The default size of a block is 8MB, and the minimum allowed block size is 1 byte. +Decreasing block size will increase the number of blocks to be read for a file. +A smaller block size may negatively impact performance as the number of prefetches required will increase. + ### Key Components `S3PrefetchingInputStream` - When prefetching is enabled, S3AFileSystem will return an instance of