diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index dbbb3e1cc6..1e15b8ef09 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -1055,8 +1055,10 @@
fs.s3a.multipart.size
- 104857600
- How big (in bytes) to split upload or copy operations up into.
+ 100M
+ How big (in bytes) to split upload or copy operations up into.
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+
@@ -1064,7 +1066,8 @@
2147483647
How big (in bytes) to split upload or copy operations up into.
This also controls the partition size in renamed files, as rename() involves
- copying the source file(s)
+ copying the source file(s).
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
@@ -1120,8 +1123,9 @@
fs.s3a.block.size
- 33554432
+ 32M
Block size to use when reading files using s3a: file system.
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
@@ -1183,10 +1187,12 @@
fs.s3a.readahead.range
- 65536
+ 64K
Bytes to read ahead during a seek() before closing and
re-opening the S3 HTTP connection. This option will be overridden if
- any call to setReadahead() is made to an open stream.
+ any call to setReadahead() is made to an open stream.
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 6030fe4fc4..925abbcacd 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -183,10 +183,11 @@ public void initialize(URI name, Configuration conf) throws IOException {
MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);
//check but do not store the block size
- longOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
+ longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
- readAhead = longOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0);
+ readAhead = longBytesOption(conf, READAHEAD_RANGE,
+ DEFAULT_READAHEAD_RANGE, 0);
storageStatistics = (S3AStorageStatistics)
GlobalStorageStatistics.INSTANCE
.put(S3AStorageStatistics.NAME,
@@ -357,6 +358,16 @@ AmazonS3 getAmazonS3Client() {
return s3;
}
+ /**
+ * Returns the read ahead range value used by this filesystem
+ * @return
+ */
+
+ @VisibleForTesting
+ long getReadAheadRange() {
+ return readAhead;
+ }
+
/**
* Get the input policy for this FS instance.
* @return the input policy
@@ -1883,7 +1894,7 @@ private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) {
*/
@Deprecated
public long getDefaultBlockSize() {
- return getConf().getLong(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE);
+ return getConf().getLongBytes(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE);
}
@Override
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index 56e0c37f3b..49f8862c3b 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -509,6 +509,27 @@ static long longOption(Configuration conf,
return v;
}
+ /**
+ * Get a long option >= the minimum allowed value, supporting memory
+ * prefixes K,M,G,T,P.
+ * @param conf configuration
+ * @param key key to look up
+ * @param defVal default value
+ * @param min minimum value
+ * @return the value
+ * @throws IllegalArgumentException if the value is below the minimum
+ */
+ static long longBytesOption(Configuration conf,
+ String key,
+ long defVal,
+ long min) {
+ long v = conf.getLongBytes(key, defVal);
+ Preconditions.checkArgument(v >= min,
+ String.format("Value of %s: %d is below the minimum value %d",
+ key, v, min));
+ return v;
+ }
+
/**
* Get a size property from the configuration: this property must
* be at least equal to {@link Constants#MULTIPART_MIN_SIZE}.
@@ -521,7 +542,7 @@ static long longOption(Configuration conf,
*/
public static long getMultipartSizeProperty(Configuration conf,
String property, long defVal) {
- long partSize = conf.getLong(property, defVal);
+ long partSize = conf.getLongBytes(property, defVal);
if (partSize < MULTIPART_MIN_SIZE) {
LOG.warn("{} must be at least 5 MB; configured value is {}",
property, partSize);
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 0eb36ef799..54a4ba906a 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -762,16 +762,20 @@ from placing its declaration on the command line.
fs.s3a.multipart.size
- 104857600
+ 100M
How big (in bytes) to split upload or copy operations up into.
- This also controls the partition size in renamed files, as rename() involves
- copying the source file(s)
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+
fs.s3a.multipart.threshold
2147483647
- Threshold before uploads or copies use parallel multipart operations.
+ How big (in bytes) to split upload or copy operations up into.
+ This also controls the partition size in renamed files, as rename() involves
+ copying the source file(s).
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+
@@ -825,7 +829,7 @@ from placing its declaration on the command line.
fs.s3a.block.size
- 33554432
+ 32M
Block size to use when reading files using s3a: file system.
@@ -859,7 +863,7 @@ from placing its declaration on the command line.
fs.s3a.readahead.range
- 65536
+ 64K
Bytes to read ahead during a seek() before closing and
re-opening the S3 HTTP connection. This option will be overridden if
any call to setReadahead() is made to an open stream.
@@ -1029,9 +1033,9 @@ S3 endpoints, as disks are not used for intermediate data storage.
fs.s3a.multipart.size
- 104857600
-
- How big (in bytes) to split upload or copy operations up into.
+ 100M
+ How big (in bytes) to split upload or copy operations up into.
+ A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
index 6ae961391d..9163b1541d 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
@@ -380,7 +380,7 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty()
byte[] file = ContractTestUtils.toAsciiByteArray("test file");
ContractTestUtils.writeAndRead(fs,
new Path("/path/style/access/testFile"), file, file.length,
- conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
+ (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
} catch (final AWSS3IOException e) {
LOG.error("Caught exception: ", e);
// Catch/pass standard path style access behaviour when live bucket
@@ -451,6 +451,17 @@ public void testDirectoryAllocatorRR() throws Throwable {
tmp1.getParent(), tmp2.getParent());
}
+ @Test
+ public void testReadAheadRange() throws Exception {
+ conf = new Configuration();
+ conf.set(Constants.READAHEAD_RANGE, "300K");
+ fs = S3ATestUtils.createTestFileSystem(conf);
+ assertNotNull(fs);
+ long readAheadRange = fs.getReadAheadRange();
+ assertNotNull(readAheadRange);
+ assertEquals("Read Ahead Range Incorrect.", 300 * 1024, readAheadRange);
+ }
+
@Test
public void testUsernameFromUGI() throws Throwable {
final String alice = "alice";