diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index dbbb3e1cc6..1e15b8ef09 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1055,8 +1055,10 @@ fs.s3a.multipart.size - 104857600 - How big (in bytes) to split upload or copy operations up into. + 100M + How big (in bytes) to split upload or copy operations up into. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + @@ -1064,7 +1066,8 @@ 2147483647 How big (in bytes) to split upload or copy operations up into. This also controls the partition size in renamed files, as rename() involves - copying the source file(s) + copying the source file(s). + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. @@ -1120,8 +1123,9 @@ fs.s3a.block.size - 33554432 + 32M Block size to use when reading files using s3a: file system. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. @@ -1183,10 +1187,12 @@ fs.s3a.readahead.range - 65536 + 64K Bytes to read ahead during a seek() before closing and re-opening the S3 HTTP connection. This option will be overridden if - any call to setReadahead() is made to an open stream. + any call to setReadahead() is made to an open stream. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 6030fe4fc4..925abbcacd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -183,10 +183,11 @@ public void initialize(URI name, Configuration conf) throws IOException { MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); //check but do not store the block size - longOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1); + longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1); enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true); - readAhead = longOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0); + readAhead = longBytesOption(conf, READAHEAD_RANGE, + DEFAULT_READAHEAD_RANGE, 0); storageStatistics = (S3AStorageStatistics) GlobalStorageStatistics.INSTANCE .put(S3AStorageStatistics.NAME, @@ -357,6 +358,16 @@ AmazonS3 getAmazonS3Client() { return s3; } + /** + * Returns the read ahead range value used by this filesystem + * @return + */ + + @VisibleForTesting + long getReadAheadRange() { + return readAhead; + } + /** * Get the input policy for this FS instance. * @return the input policy @@ -1883,7 +1894,7 @@ private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) { */ @Deprecated public long getDefaultBlockSize() { - return getConf().getLong(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE); + return getConf().getLongBytes(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 56e0c37f3b..49f8862c3b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -509,6 +509,27 @@ static long longOption(Configuration conf, return v; } + /** + * Get a long option >= the minimum allowed value, supporting memory + * prefixes K,M,G,T,P. + * @param conf configuration + * @param key key to look up + * @param defVal default value + * @param min minimum value + * @return the value + * @throws IllegalArgumentException if the value is below the minimum + */ + static long longBytesOption(Configuration conf, + String key, + long defVal, + long min) { + long v = conf.getLongBytes(key, defVal); + Preconditions.checkArgument(v >= min, + String.format("Value of %s: %d is below the minimum value %d", + key, v, min)); + return v; + } + /** * Get a size property from the configuration: this property must * be at least equal to {@link Constants#MULTIPART_MIN_SIZE}. @@ -521,7 +542,7 @@ static long longOption(Configuration conf, */ public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) { - long partSize = conf.getLong(property, defVal); + long partSize = conf.getLongBytes(property, defVal); if (partSize < MULTIPART_MIN_SIZE) { LOG.warn("{} must be at least 5 MB; configured value is {}", property, partSize); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 0eb36ef799..54a4ba906a 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -762,16 +762,20 @@ from placing its declaration on the command line. fs.s3a.multipart.size - 104857600 + 100M How big (in bytes) to split upload or copy operations up into. - This also controls the partition size in renamed files, as rename() involves - copying the source file(s) + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + fs.s3a.multipart.threshold 2147483647 - Threshold before uploads or copies use parallel multipart operations. + How big (in bytes) to split upload or copy operations up into. + This also controls the partition size in renamed files, as rename() involves + copying the source file(s). + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + @@ -825,7 +829,7 @@ from placing its declaration on the command line. fs.s3a.block.size - 33554432 + 32M Block size to use when reading files using s3a: file system. @@ -859,7 +863,7 @@ from placing its declaration on the command line. fs.s3a.readahead.range - 65536 + 64K Bytes to read ahead during a seek() before closing and re-opening the S3 HTTP connection. This option will be overridden if any call to setReadahead() is made to an open stream. @@ -1029,9 +1033,9 @@ S3 endpoints, as disks are not used for intermediate data storage. fs.s3a.multipart.size - 104857600 - - How big (in bytes) to split upload or copy operations up into. + 100M + How big (in bytes) to split upload or copy operations up into. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 6ae961391d..9163b1541d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -380,7 +380,7 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() byte[] file = ContractTestUtils.toAsciiByteArray("test file"); ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, - conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); + (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); } catch (final AWSS3IOException e) { LOG.error("Caught exception: ", e); // Catch/pass standard path style access behaviour when live bucket @@ -451,6 +451,17 @@ public void testDirectoryAllocatorRR() throws Throwable { tmp1.getParent(), tmp2.getParent()); } + @Test + public void testReadAheadRange() throws Exception { + conf = new Configuration(); + conf.set(Constants.READAHEAD_RANGE, "300K"); + fs = S3ATestUtils.createTestFileSystem(conf); + assertNotNull(fs); + long readAheadRange = fs.getReadAheadRange(); + assertNotNull(readAheadRange); + assertEquals("Read Ahead Range Incorrect.", 300 * 1024, readAheadRange); + } + @Test public void testUsernameFromUGI() throws Throwable { final String alice = "alice";