From 9e53ed3602a652e887051cd50bb6938fd165af76 Mon Sep 17 00:00:00 2001 From: Mehakmeet Singh Date: Tue, 15 Nov 2022 19:58:41 +0530 Subject: [PATCH] HADOOP-18528. Disable abfs prefetching by default (#5134) Disables block prefetching on ABFS InputStreams, by setting fs.azure.enable.readahead to false in core-default.xml and the matching java constant. This prevents HADOOP-18521. ABFS ReadBufferManager buffer sharing across concurrent HTTP requests. Once a fix for that is committed, this change can be reverted. Contributed by Mehakmeet Singh. --- .../hadoop-common/src/main/resources/core-default.xml | 7 +++++++ .../fs/azurebfs/constants/FileSystemConfigurations.java | 2 +- .../fs/azurebfs/services/AbfsInputStreamContext.java | 2 +- .../azurebfs/contract/ITestAbfsFileSystemContractSeek.java | 2 ++ .../hadoop/fs/azurebfs/services/TestAbfsInputStream.java | 2 ++ 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index d5cb5cceec..9e6bdc2433 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -2141,6 +2141,13 @@ The switch to turn S3A auditing on or off. The AbstractFileSystem for gs: uris. + + fs.azure.enable.readahead + false + Disable readahead/prefetching in AbfsInputStream. + See HADOOP-18521 + + io.seqfile.compress.blocksize 1000000 diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 42f3b7503e..097285bb48 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -106,7 +106,7 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_ABFS_LATENCY_TRACK = false; public static final long DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120; - public static final boolean DEFAULT_ENABLE_READAHEAD = true; + public static final boolean DEFAULT_ENABLE_READAHEAD = false; public static final String DEFAULT_FS_AZURE_USER_AGENT_PREFIX = EMPTY_STRING; public static final String DEFAULT_VALUE_UNKNOWN = "UNKNOWN"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java index 05afc7b985..b479c22fce 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStreamContext.java @@ -35,7 +35,7 @@ public class AbfsInputStreamContext extends AbfsStreamContext { private boolean tolerateOobAppends; - private boolean isReadAheadEnabled = true; + private boolean isReadAheadEnabled = false; private boolean alwaysReadBufferSize; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java index f7fe503979..aaf47f7a9c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSeek.java @@ -34,6 +34,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_AHEAD_RANGE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_READAHEAD; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; @@ -68,6 +69,7 @@ protected Configuration createConfiguration() { protected AbstractFSContract createContract(final Configuration conf) { conf.setInt(AZURE_READ_AHEAD_RANGE, MIN_BUFFER_SIZE); conf.setInt(AZURE_READ_BUFFER_SIZE, MIN_BUFFER_SIZE); + conf.setBoolean(FS_AZURE_ENABLE_READAHEAD, true); return new AbfsFileSystemContract(conf, isSecure); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java index b5ae9b7378..69795ee5bd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java @@ -106,6 +106,7 @@ private AbfsClient getMockAbfsClient() { private AbfsInputStream getAbfsInputStream(AbfsClient mockAbfsClient, String fileName) throws IOException { AbfsInputStreamContext inputStreamContext = new AbfsInputStreamContext(-1); + inputStreamContext.isReadAheadEnabled(true); // Create AbfsInputStream with the client instance AbfsInputStream inputStream = new AbfsInputStream( mockAbfsClient, @@ -131,6 +132,7 @@ public AbfsInputStream getAbfsInputStream(AbfsClient abfsClient, boolean alwaysReadBufferSize, int readAheadBlockSize) throws IOException { AbfsInputStreamContext inputStreamContext = new AbfsInputStreamContext(-1); + inputStreamContext.isReadAheadEnabled(true); // Create AbfsInputStream with the client instance AbfsInputStream inputStream = new AbfsInputStream( abfsClient,