HADOOP-11183. Memory-based S3AOutputstream. (Thomas Demoor via stevel)

2015-03-03 16:18:39 -08:00 · 2015-03-03 16:18:39 -08:00 · 15b7076ad5
commit 15b7076ad5
parent e17e5ba9d7
7 changed files with 570 additions and 17 deletions
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -667,6 +667,8 @@ Release 2.7.0 - UNRELEASED
    HADOOP-11620. Add support for load balancing across a group of KMS for HA.
    (Arun Suresh via wang)

+    HADOOP-11183. Memory-based S3AOutputstream. (Thomas Demoor via stevel)
+
  BUG FIXES

    HADOOP-11512. Use getTrimmedStrings when reading serialization keys
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@ -763,13 +763,13 @@ for ldap providers in the same way as above does.
 <property>
  <name>fs.s3a.connection.establish.timeout</name>
  <value>5000</value>
-  <description>Socket connection setup timeout in seconds.</description>
+  <description>Socket connection setup timeout in milliseconds.</description>
 </property>

 <property>
  <name>fs.s3a.connection.timeout</name>
  <value>50000</value>
-  <description>Socket connection timeout in seconds.</description>
+  <description>Socket connection timeout in milliseconds.</description>
 </property>

 <property>
@ -845,6 +845,22 @@ for ldap providers in the same way as above does.
    uploads to.</description>
 </property>

+<property>
+  <name>fs.s3a.fast.upload</name>
+  <value>false</value>
+  <description>Upload directly from memory instead of buffering to
+    disk first. Memory usage and parallelism can be controlled as up to
+    fs.s3a.multipart.size memory is consumed for each (part)upload actively
+    uploading (fs.s3a.threads.max) or queueing (fs.s3a.max.total.tasks)</description>
+</property>
+
+  <property>
+  <name>fs.s3a.fast.buffer.size</name>
+  <value>1048576</value>
+  <description>Size of initial memory buffer in bytes allocated for an
+    upload. No effect if fs.s3a.fast.upload is false.</description>
+</property>
+
 <property>
  <name>fs.s3a.impl</name>
  <value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@ -83,6 +83,14 @@ public class Constants {
  // comma separated list of directories
  public static final String BUFFER_DIR = "fs.s3a.buffer.dir";

+  // should we upload directly from memory rather than using a file buffer
+  public static final String FAST_UPLOAD = "fs.s3a.fast.upload";
+  public static final boolean DEFAULT_FAST_UPLOAD = false;
+
+  //initial size of memory buffer for a fast upload
+  public static final String FAST_BUFFER_SIZE = "fs.s3a.fast.buffer.size";
+  public static final int DEFAULT_FAST_BUFFER_SIZE = 1048576; //1MB
+
  // private | public-read | public-read-write | authenticated-read | 
  // log-delivery-write | bucket-owner-read | bucket-owner-full-control
  public static final String CANNED_ACL = "fs.s3a.acl.default";
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFastOutputStream.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFastOutputStream.java
@ -0,0 +1,413 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import com.amazonaws.AmazonClientException;
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.event.ProgressEvent;
+import com.amazonaws.event.ProgressListener;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
+import com.amazonaws.services.s3.model.CannedAccessControlList;
+import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
+import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
+import com.amazonaws.services.s3.model.ObjectMetadata;
+import com.amazonaws.services.s3.model.PartETag;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.UploadPartRequest;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import com.google.common.util.concurrent.MoreExecutors;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.util.Progressable;
+import org.slf4j.Logger;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ThreadPoolExecutor;
+
+
+/**
+ * Upload files/parts asap directly from a memory buffer (instead of buffering
+ * to a file).
+ * <p/>
+ * Uploads are managed low-level rather than through the AWS TransferManager.
+ * This allows for uploading each part of a multi-part upload as soon as
+ * the bytes are in memory, rather than waiting until the file is closed.
+ * <p/>
+ * Unstable: statistics and error handling might evolve
+ */
+@InterfaceStability.Unstable
+public class S3AFastOutputStream extends OutputStream {
+
+  private static final Logger LOG = S3AFileSystem.LOG;
+  private final String key;
+  private final String bucket;
+  private final AmazonS3Client client;
+  private final int partSize;
+  private final int multiPartThreshold;
+  private final S3AFileSystem fs;
+  private final CannedAccessControlList cannedACL;
+  private final FileSystem.Statistics statistics;
+  private final String serverSideEncryptionAlgorithm;
+  private final ProgressListener progressListener;
+  private final ListeningExecutorService executorService;
+  private MultiPartUpload multiPartUpload;
+  private boolean closed;
+  private ByteArrayOutputStream buffer;
+  private int bufferLimit;
+
+
+  /**
+   * Creates a fast OutputStream that uploads to S3 from memory.
+   * For MultiPartUploads, as soon as sufficient bytes have been written to
+   * the stream a part is uploaded immediately (by using the low-level
+   * multi-part upload API on the AmazonS3Client).
+   *
+   * @param client AmazonS3Client used for S3 calls
+   * @param fs S3AFilesystem
+   * @param bucket S3 bucket name
+   * @param key S3 key name
+   * @param progress report progress in order to prevent timeouts
+   * @param statistics track FileSystem.Statistics on the performed operations
+   * @param cannedACL used CannedAccessControlList
+   * @param serverSideEncryptionAlgorithm algorithm for server side encryption
+   * @param partSize size of a single part in a multi-part upload (except
+   * last part)
+   * @param multiPartThreshold files at least this size use multi-part upload
+   * @throws IOException
+   */
+  public S3AFastOutputStream(AmazonS3Client client, S3AFileSystem fs,
+      String bucket, String key, Progressable progress,
+      FileSystem.Statistics statistics, CannedAccessControlList cannedACL,
+      String serverSideEncryptionAlgorithm, long partSize,
+      long multiPartThreshold, ThreadPoolExecutor threadPoolExecutor)
+      throws IOException {
+    this.bucket = bucket;
+    this.key = key;
+    this.client = client;
+    this.fs = fs;
+    this.cannedACL = cannedACL;
+    this.statistics = statistics;
+    this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm;
+    //Ensure limit as ByteArrayOutputStream size cannot exceed Integer.MAX_VALUE
+    if (partSize > Integer.MAX_VALUE) {
+      this.partSize = Integer.MAX_VALUE;
+      LOG.warn("s3a: MULTIPART_SIZE capped to ~2.14GB (maximum allowed size " +
+          "when using 'FAST_UPLOAD = true')");
+    } else {
+      this.partSize = (int) partSize;
+    }
+    if (multiPartThreshold > Integer.MAX_VALUE) {
+      this.multiPartThreshold = Integer.MAX_VALUE;
+      LOG.warn("s3a: MIN_MULTIPART_THRESHOLD capped to ~2.14GB (maximum " +
+          "allowed size when using 'FAST_UPLOAD = true')");
+    } else {
+      this.multiPartThreshold = (int) multiPartThreshold;
+    }
+    this.bufferLimit = this.multiPartThreshold;
+    this.closed = false;
+    int initialBufferSize = this.fs.getConf()
+        .getInt(Constants.FAST_BUFFER_SIZE, Constants.DEFAULT_FAST_BUFFER_SIZE);
+    if (initialBufferSize < 0) {
+      LOG.warn("s3a: FAST_BUFFER_SIZE should be a positive number. Using " +
+          "default value");
+      initialBufferSize = Constants.DEFAULT_FAST_BUFFER_SIZE;
+    } else if (initialBufferSize > this.bufferLimit) {
+      LOG.warn("s3a: automatically adjusting FAST_BUFFER_SIZE to not " +
+          "exceed MIN_MULTIPART_THRESHOLD");
+      initialBufferSize = this.bufferLimit;
+    }
+    this.buffer = new ByteArrayOutputStream(initialBufferSize);
+    this.executorService = MoreExecutors.listeningDecorator(threadPoolExecutor);
+    this.multiPartUpload = null;
+    this.progressListener = new ProgressableListener(progress);
+    if (LOG.isDebugEnabled()){
+      LOG.debug("Initialized S3AFastOutputStream for bucket '{}' key '{}'",
+          bucket, key);
+    }
+  }
+
+  /**
+   * Writes a byte to the memory buffer. If this causes the buffer to reach
+   * its limit, the actual upload is submitted to the threadpool.
+   * @param b the int of which the lowest byte is written
+   * @throws IOException
+   */
+  @Override
+  public synchronized void write(int b) throws IOException {
+    buffer.write(b);
+    if (buffer.size() == bufferLimit) {
+      uploadBuffer();
+    }
+  }
+
+  /**
+   * Writes a range of bytes from to the memory buffer. If this causes the
+   * buffer to reach its limit, the actual upload is submitted to the
+   * threadpool and the remainder of the array is written to memory
+   * (recursively).
+   * @param b byte array containing
+   * @param off offset in array where to start
+   * @param len number of bytes to be written
+   * @throws IOException
+   */
+  @Override
+  public synchronized void write(byte b[], int off, int len)
+      throws IOException {
+    if (b == null) {
+      throw new NullPointerException();
+    } else if ((off < 0) || (off > b.length) || (len < 0) ||
+        ((off + len) > b.length) || ((off + len) < 0)) {
+      throw new IndexOutOfBoundsException();
+    } else if (len == 0) {
+      return;
+    }
+    if (buffer.size() + len < bufferLimit) {
+      buffer.write(b, off, len);
+    } else {
+      int firstPart = bufferLimit - buffer.size();
+      buffer.write(b, off, firstPart);
+      uploadBuffer();
+      this.write(b, off + firstPart, len - firstPart);
+    }
+  }
+
+  private synchronized void uploadBuffer() throws IOException {
+    if (multiPartUpload == null) {
+      multiPartUpload = initiateMultiPartUpload();
+       /* Upload the existing buffer if it exceeds partSize. This possibly
+       requires multiple parts! */
+      final byte[] allBytes = buffer.toByteArray();
+      buffer = null; //earlier gc?
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Total length of initial buffer: {}", allBytes.length);
+      }
+      int processedPos = 0;
+      while ((multiPartThreshold - processedPos) >= partSize) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Initial buffer: processing from byte {} to byte {}",
+              processedPos, (processedPos + partSize - 1));
+        }
+        multiPartUpload.uploadPartAsync(new ByteArrayInputStream(allBytes,
+            processedPos, partSize), partSize);
+        processedPos += partSize;
+      }
+      //resize and reset stream
+      bufferLimit = partSize;
+      buffer = new ByteArrayOutputStream(bufferLimit);
+      buffer.write(allBytes, processedPos, multiPartThreshold - processedPos);
+    } else {
+      //upload next part
+      multiPartUpload.uploadPartAsync(new ByteArrayInputStream(buffer
+          .toByteArray()), partSize);
+      buffer.reset();
+    }
+  }
+
+
+  @Override
+  public synchronized void close() throws IOException {
+    if (closed) {
+      return;
+    }
+    closed = true;
+    try {
+      if (multiPartUpload == null) {
+        putObject();
+      } else {
+        if (buffer.size() > 0) {
+          //send last part
+          multiPartUpload.uploadPartAsync(new ByteArrayInputStream(buffer
+              .toByteArray()), buffer.size());
+        }
+        final List<PartETag> partETags = multiPartUpload
+            .waitForAllPartUploads();
+        multiPartUpload.complete(partETags);
+      }
+      statistics.incrementWriteOps(1);
+      // This will delete unnecessary fake parent directories
+      fs.finishedWrite(key);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Upload complete for bucket '{}' key '{}'", bucket, key);
+      }
+    } finally {
+      buffer = null;
+      super.close();
+    }
+  }
+
+  private ObjectMetadata createDefaultMetadata() {
+    ObjectMetadata om = new ObjectMetadata();
+    if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
+      om.setServerSideEncryption(serverSideEncryptionAlgorithm);
+    }
+    return om;
+  }
+
+  private MultiPartUpload initiateMultiPartUpload() throws IOException {
+    final ObjectMetadata om = createDefaultMetadata();
+    final InitiateMultipartUploadRequest initiateMPURequest =
+        new InitiateMultipartUploadRequest(bucket, key, om);
+    initiateMPURequest.setCannedACL(cannedACL);
+    try {
+      return new MultiPartUpload(
+          client.initiateMultipartUpload(initiateMPURequest).getUploadId());
+    } catch (AmazonServiceException ase) {
+      throw new IOException("Unable to initiate MultiPartUpload (server side)" +
+          ": " + ase, ase);
+    } catch (AmazonClientException ace) {
+      throw new IOException("Unable to initiate MultiPartUpload (client side)" +
+          ": " + ace, ace);
+    }
+  }
+
+  private void putObject() throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Executing regular upload for bucket '{}' key '{}'", bucket,
+          key);
+    }
+    final ObjectMetadata om = createDefaultMetadata();
+    om.setContentLength(buffer.size());
+    final PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key,
+        new ByteArrayInputStream(buffer.toByteArray()), om);
+    putObjectRequest.setCannedAcl(cannedACL);
+    putObjectRequest.setGeneralProgressListener(progressListener);
+    ListenableFuture<PutObjectResult> putObjectResult =
+        executorService.submit(new Callable<PutObjectResult>() {
+          @Override
+          public PutObjectResult call() throws Exception {
+            return client.putObject(putObjectRequest);
+          }
+        });
+    //wait for completion
+    try {
+      putObjectResult.get();
+    } catch (InterruptedException ie) {
+      LOG.warn("Interrupted object upload:" + ie, ie);
+      Thread.currentThread().interrupt();
+    } catch (ExecutionException ee) {
+      throw new IOException("Regular upload failed", ee.getCause());
+    }
+  }
+
+  private class MultiPartUpload {
+    private final String uploadId;
+    private final List<ListenableFuture<PartETag>> partETagsFutures;
+
+    public MultiPartUpload(String uploadId) {
+      this.uploadId = uploadId;
+      this.partETagsFutures = new ArrayList<ListenableFuture<PartETag>>();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Initiated multi-part upload for bucket '{}' key '{}' with " +
+            "id '{}'", bucket, key, uploadId);
+      }
+    }
+
+    public void uploadPartAsync(ByteArrayInputStream inputStream,
+        int partSize) {
+      final int currentPartNumber = partETagsFutures.size() + 1;
+      final UploadPartRequest request =
+          new UploadPartRequest().withBucketName(bucket).withKey(key)
+              .withUploadId(uploadId).withInputStream(inputStream)
+              .withPartNumber(currentPartNumber).withPartSize(partSize);
+      request.setGeneralProgressListener(progressListener);
+      ListenableFuture<PartETag> partETagFuture =
+          executorService.submit(new Callable<PartETag>() {
+            @Override
+            public PartETag call() throws Exception {
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Uploading part {} for id '{}'", currentPartNumber,
+                    uploadId);
+              }
+              return client.uploadPart(request).getPartETag();
+            }
+          });
+      partETagsFutures.add(partETagFuture);
+    }
+
+    public List<PartETag> waitForAllPartUploads() throws IOException {
+      try {
+        return Futures.allAsList(partETagsFutures).get();
+      } catch (InterruptedException ie) {
+        LOG.warn("Interrupted partUpload:" + ie, ie);
+        Thread.currentThread().interrupt();
+      } catch (ExecutionException ee) {
+        //there is no way of recovering so abort
+        //cancel all partUploads
+        for (ListenableFuture<PartETag> future : partETagsFutures) {
+          future.cancel(true);
+        }
+        //abort multipartupload
+        this.abort();
+        throw new IOException("Part upload failed in multi-part upload with " +
+            "id '" +uploadId + "':" + ee, ee);
+      }
+      //should not happen?
+      return null;
+    }
+
+    public void complete(List<PartETag> partETags) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Completing multi-part upload for key '{}', id '{}'", key,
+            uploadId);
+      }
+      final CompleteMultipartUploadRequest completeRequest =
+          new CompleteMultipartUploadRequest(bucket, key, uploadId, partETags);
+      client.completeMultipartUpload(completeRequest);
+
+    }
+
+    public void abort() {
+      LOG.warn("Aborting multi-part upload with id '{}'", uploadId);
+      try {
+        client.abortMultipartUpload(new AbortMultipartUploadRequest(bucket,
+            key, uploadId));
+      } catch (Exception e2) {
+        LOG.warn("Unable to abort multipart upload, you may need to purge  " +
+            "uploaded parts: " + e2, e2);
+      }
+    }
+  }
+
+  private static class ProgressableListener implements ProgressListener {
+    private final Progressable progress;
+
+    public ProgressableListener(Progressable progress) {
+      this.progress = progress;
+    }
+
+    public void progressChanged(ProgressEvent progressEvent) {
+      if (progress != null) {
+        progress.progress();
+      }
+    }
+  }
+}
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@ -88,7 +88,8 @@ public class S3AFileSystem extends FileSystem {
  private int maxKeys;
  private long partSize;
  private TransferManager transfers;
-  private int partSizeThreshold;
+  private ThreadPoolExecutor threadPoolExecutor;
+  private int multiPartThreshold;
  public static final Logger LOG = LoggerFactory.getLogger(S3AFileSystem.class);
  private CannedAccessControlList cannedACL;
  private String serverSideEncryptionAlgorithm;
@ -237,7 +238,7 @@ public void initialize(URI name, Configuration conf) throws IOException {

    maxKeys = conf.getInt(MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS);
    partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
-    partSizeThreshold = conf.getInt(MIN_MULTIPART_THRESHOLD, 
+    multiPartThreshold = conf.getInt(MIN_MULTIPART_THRESHOLD,
      DEFAULT_MIN_MULTIPART_THRESHOLD);

    if (partSize < 5 * 1024 * 1024) {
@ -245,9 +246,9 @@ public void initialize(URI name, Configuration conf) throws IOException {
      partSize = 5 * 1024 * 1024;
    }

-    if (partSizeThreshold < 5 * 1024 * 1024) {
+    if (multiPartThreshold < 5 * 1024 * 1024) {
      LOG.error(MIN_MULTIPART_THRESHOLD + " must be at least 5 MB");
-      partSizeThreshold = 5 * 1024 * 1024;
+      multiPartThreshold = 5 * 1024 * 1024;
    }

    int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
@ -262,20 +263,20 @@ public void initialize(URI name, Configuration conf) throws IOException {
    LinkedBlockingQueue<Runnable> workQueue =
      new LinkedBlockingQueue<>(maxThreads *
        conf.getInt(MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS));
-    ThreadPoolExecutor tpe = new ThreadPoolExecutor(
+    threadPoolExecutor = new ThreadPoolExecutor(
        coreThreads,
        maxThreads,
        keepAliveTime,
        TimeUnit.SECONDS,
        workQueue,
        newDaemonThreadFactory("s3a-transfer-shared-"));
-    tpe.allowCoreThreadTimeOut(true);
+    threadPoolExecutor.allowCoreThreadTimeOut(true);

    TransferManagerConfiguration transferConfiguration = new TransferManagerConfiguration();
    transferConfiguration.setMinimumUploadPartSize(partSize);
-    transferConfiguration.setMultipartUploadThreshold(partSizeThreshold);
+    transferConfiguration.setMultipartUploadThreshold(multiPartThreshold);

-    transfers = new TransferManager(s3, tpe);
+    transfers = new TransferManager(s3, threadPoolExecutor);
    transfers.setConfiguration(transferConfiguration);

    String cannedACLName = conf.get(CANNED_ACL, DEFAULT_CANNED_ACL);
@ -391,7 +392,12 @@ public FSDataOutputStream create(Path f, FsPermission permission, boolean overwr
    if (!overwrite && exists(f)) {
      throw new FileAlreadyExistsException(f + " already exists");
    }
-
+    if (getConf().getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD)) {
+      return new FSDataOutputStream(new S3AFastOutputStream(s3, this, bucket,
+          key, progress, statistics, cannedACL,
+          serverSideEncryptionAlgorithm, partSize, (long)multiPartThreshold,
+          threadPoolExecutor), statistics);
+    }
    // We pass null to FSDataOutputStream so it won't count writes that are being buffered to a file
    return new FSDataOutputStream(new S3AOutputStream(getConf(), transfers, this,
      bucket, key, progress, cannedACL, statistics, 
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@ -213,13 +213,13 @@ If you do any of these: change your credentials immediately!
    <property>
      <name>fs.s3a.connection.establish.timeout</name>
      <value>5000</value>
-      <description>Socket connection setup timeout in seconds.</description>
+      <description>Socket connection setup timeout in milliseconds.</description>
    </property>

    <property>
      <name>fs.s3a.connection.timeout</name>
      <value>50000</value>
-      <description>Socket connection timeout in seconds.</description>
+      <description>Socket connection timeout in milliseconds.</description>
    </property>

    <property>
@ -292,7 +292,7 @@ If you do any of these: change your credentials immediately!
      <name>fs.s3a.buffer.dir</name>
      <value>${hadoop.tmp.dir}/s3a</value>
      <description>Comma separated list of directories that will be used to buffer file
-        uploads to.</description>
+        uploads to. No effect if fs.s3a.fast.upload is true.</description>
    </property>

    <property>
@ -301,6 +301,40 @@ If you do any of these: change your credentials immediately!
      <description>The implementation class of the S3A Filesystem</description>
    </property>

+### S3AFastOutputStream
+ **Warning: NEW in hadoop 2.7. UNSTABLE, EXPERIMENTAL: use at own risk**
+
+    <property>
+      <name>fs.s3a.fast.upload</name>
+      <value>false</value>
+      <description>Upload directly from memory instead of buffering to
+      disk first. Memory usage and parallelism can be controlled as up to
+      fs.s3a.multipart.size memory is consumed for each (part)upload actively
+      uploading (fs.s3a.threads.max) or queueing (fs.s3a.max.total.tasks)</description>
+    </property>
+
+    <property>
+      <name>fs.s3a.fast.buffer.size</name>
+      <value>1048576</value>
+      <description>Size (in bytes) of initial memory buffer allocated for an
+      upload. No effect if fs.s3a.fast.upload is false.</description>
+    </property>
+
+Writes are buffered in memory instead of to a file on local disk. This
+removes the throughput bottleneck of the local disk write and read cycle
+before starting the actual upload. Furthermore, it allows handling files that
+are larger than the remaining local disk space.
+
+However, non-trivial memory tuning is needed for optimal results and careless
+settings could cause memory overflow. Up to `fs.s3a.threads.max` parallel
+(part)uploads are active. Furthermore, up to `fs.s3a.max.total.tasks`
+additional part(uploads) can be waiting (and thus memory buffers are created).
+The memory buffer is uploaded as a single upload if it is not larger than
+`fs.s3a.multipart.threshold`. Else, a multi-part upload is initiatated and
+parts of size `fs.s3a.multipart.size` are used to protect against overflowing
+the available memory. These settings should be tuned to the envisioned
+workflow (some large files, many small ones, ...) and the physical
+limitations of the machine and cluster (memory, network bandwidth).

 ## Testing the S3 filesystem clients

@ -334,7 +368,7 @@ each filesystem for its testing.
 The contents of each bucket will be destroyed during the test process:
 do not use the bucket for any purpose other than testing. Furthermore, for
 s3a, all in-progress multi-part uploads to the bucket will be aborted at the
-start of a test (by forcing fs.s3a.multipart.purge=true) to clean up the
+start of a test (by forcing `fs.s3a.multipart.purge=true`) to clean up the
 temporary state of previously failed tests.

 Example:
@ -392,14 +426,14 @@ Example:
 ## File `contract-test-options.xml`

 The file `hadoop-tools/hadoop-aws/src/test/resources/contract-test-options.xml`
-must be created and configured for the test fileystems.
+must be created and configured for the test filesystems.

 If a specific file `fs.contract.test.fs.*` test path is not defined for
 any of the filesystems, those tests will be skipped.

 The standard S3 authentication details must also be provided. This can be
 through copy-and-paste of the `auth-keys.xml` credentials, or it can be
-through direct XInclude inclustion.
+through direct XInclude inclusion.

 #### s3://

--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFastOutputStream.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFastOutputStream.java
@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.Timeout;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+
+/**
+ * Tests regular and multi-part upload functionality for S3AFastOutputStream.
+ * File sizes are kept small to reduce test duration on slow connections
+ */
+public class TestS3AFastOutputStream {
+  private FileSystem fs;
+
+
+  @Rule
+  public Timeout testTimeout = new Timeout(30 * 60 * 1000);
+
+  @Before
+  public void setUp() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setLong(Constants.MIN_MULTIPART_THRESHOLD, 5 * 1024 * 1024);
+    conf.setInt(Constants.MULTIPART_SIZE, 5 * 1024 * 1024);
+    conf.setBoolean(Constants.FAST_UPLOAD, true);
+    fs = S3ATestUtils.createTestFileSystem(conf);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (fs != null) {
+      fs.delete(getTestPath(), true);
+    }
+  }
+
+  protected Path getTestPath() {
+    return new Path("/tests3a");
+  }
+
+  @Test
+  public void testRegularUpload() throws IOException {
+    ContractTestUtils.createAndVerifyFile(fs, getTestPath(), 1024 * 1024);
+  }
+
+  @Test
+  public void testMultiPartUpload() throws IOException {
+    ContractTestUtils.createAndVerifyFile(fs, getTestPath(), 6 * 1024 *
+        1024);
+  }
+}