diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
index 1f668eb677..86c284a9e8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
@@ -54,16 +54,29 @@
* options accordingly, for example:
*
*
- * FSDataOutputStreamBuilder builder = fs.createFile(path);
- * builder.permission(perm)
+ *
+ * // Don't
+ * if (fs instanceof FooFileSystem) {
+ * FooFileSystem fs = (FooFileSystem) fs;
+ * OutputStream out = dfs.createFile(path)
+ * .optionA()
+ * .optionB("value")
+ * .cache()
+ * .build()
+ * } else if (fs instanceof BarFileSystem) {
+ * ...
+ * }
+ *
+ * // Do
+ * OutputStream out = fs.createFile(path)
+ * .permission(perm)
* .bufferSize(bufSize)
- * .opt("dfs.outputstream.builder.lazy-persist", true)
- * .opt("dfs.outputstream.builder.ec.policy-name", "rs-3-2-64k")
- * .opt("fs.local.o-direct", true)
- * .must("fs.s3a.fast-upload", true)
- * .must("fs.azure.buffer-size", 256 * 1024 * 1024);
- * FSDataOutputStream out = builder.build();
- * ...
+ * .opt("foofs:option.a", true)
+ * .opt("foofs:option.b", "value")
+ * .opt("barfs:cache", true)
+ * .must("foofs:cache", true)
+ * .must("barfs:cache-size", 256 * 1024 * 1024)
+ * .build();
*
*
* If the option is not related to the file system, the option will be ignored.
@@ -263,6 +276,8 @@ public B opt(@Nonnull final String key, @Nonnull final String value) {
/**
* Set optional boolean parameter for the Builder.
+ *
+ * @see #opt(String, String)
*/
public B opt(@Nonnull final String key, boolean value) {
mandatoryKeys.remove(key);
@@ -272,6 +287,8 @@ public B opt(@Nonnull final String key, boolean value) {
/**
* Set optional int parameter for the Builder.
+ *
+ * @see #opt(String, String)
*/
public B opt(@Nonnull final String key, int value) {
mandatoryKeys.remove(key);
@@ -281,6 +298,8 @@ public B opt(@Nonnull final String key, int value) {
/**
* Set optional float parameter for the Builder.
+ *
+ * @see #opt(String, String)
*/
public B opt(@Nonnull final String key, float value) {
mandatoryKeys.remove(key);
@@ -290,6 +309,8 @@ public B opt(@Nonnull final String key, float value) {
/**
* Set optional double parameter for the Builder.
+ *
+ * @see #opt(String, String)
*/
public B opt(@Nonnull final String key, double value) {
mandatoryKeys.remove(key);
@@ -299,6 +320,8 @@ public B opt(@Nonnull final String key, double value) {
/**
* Set an array of string values as optional parameter for the Builder.
+ *
+ * @see #opt(String, String)
*/
public B opt(@Nonnull final String key, @Nonnull final String... values) {
mandatoryKeys.remove(key);
@@ -310,8 +333,7 @@ public B opt(@Nonnull final String key, @Nonnull final String... values) {
* Set mandatory option to the Builder.
*
* If the option is not supported or unavailable on the {@link FileSystem},
- * the client should expect {@link #build()} throws
- * {@link IllegalArgumentException}.
+ * the client should expect {@link #build()} throws IllegalArgumentException.
*/
public B must(@Nonnull final String key, @Nonnull final String value) {
mandatoryKeys.add(key);
@@ -319,35 +341,55 @@ public B must(@Nonnull final String key, @Nonnull final String value) {
return getThisBuilder();
}
- /** Set mandatory boolean option. */
+ /**
+ * Set mandatory boolean option.
+ *
+ * @see #must(String, String)
+ */
public B must(@Nonnull final String key, boolean value) {
mandatoryKeys.add(key);
options.setBoolean(key, value);
return getThisBuilder();
}
- /** Set mandatory int option. */
+ /**
+ * Set mandatory int option.
+ *
+ * @see #must(String, String)
+ */
public B must(@Nonnull final String key, int value) {
mandatoryKeys.add(key);
options.setInt(key, value);
return getThisBuilder();
}
- /** Set mandatory float option. */
+ /**
+ * Set mandatory float option.
+ *
+ * @see #must(String, String)
+ */
public B must(@Nonnull final String key, float value) {
mandatoryKeys.add(key);
options.setFloat(key, value);
return getThisBuilder();
}
- /** Set mandatory double option. */
+ /**
+ * Set mandatory double option.
+ *
+ * @see #must(String, String)
+ */
public B must(@Nonnull final String key, double value) {
mandatoryKeys.add(key);
options.setDouble(key, value);
return getThisBuilder();
}
- /** Set a string array as mandatory option. */
+ /**
+ * Set a string array as mandatory option.
+ *
+ * @see #must(String, String)
+ */
public B must(@Nonnull final String key, @Nonnull final String... values) {
mandatoryKeys.add(key);
options.setStrings(key, values);
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
index d7e57cef3e..1e522c7782 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md
@@ -553,7 +553,7 @@ on a path that exists and is a file. Instead the operation returns false.
FS' = FS
result = False
-### `FSDataOutputStream create(Path, ...)`
+### `FSDataOutputStream create(Path, ...)`
FSDataOutputStream create(Path p,
@@ -616,7 +616,24 @@ this precondition fails.
* Not covered: symlinks. The resolved path of the symlink is used as the final path argument to the `create()` operation
-### `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)`
+### `FSDataOutputStreamBuilder createFile(Path p)`
+
+Make a `FSDataOutputStreamBuilder` to specify the parameters to create a file.
+
+#### Implementation Notes
+
+`createFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make
+change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`,
+the builder parameters are verified and [`create(Path p)`](#FileSystem.create)
+is invoked on the underlying filesystem. `build()` has the same preconditions
+and postconditions as [`create(Path p)`](#FileSystem.create).
+
+* Similar to [`create(Path p)`](#FileSystem.create), files are overwritten
+by default, unless specify `builder.overwrite(false)`.
+* Unlike [`create(Path p)`](#FileSystem.create), missing parent directories are
+not created by default, unless specify `builder.recursive()`.
+
+### `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)`
Implementations without a compliant call SHOULD throw `UnsupportedOperationException`.
@@ -634,6 +651,18 @@ Implementations without a compliant call SHOULD throw `UnsupportedOperationExcep
Return: `FSDataOutputStream`, which can update the entry `FS.Files[p]`
by appending data to the existing list.
+### `FSDataOutputStreamBuilder appendFile(Path p)`
+
+Make a `FSDataOutputStreamBuilder` to specify the parameters to append to an
+existing file.
+
+#### Implementation Notes
+
+`appendFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make
+change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`,
+the builder parameters are verified and [`append()`](#FileSystem.append) is
+invoked on the underlying filesystem. `build()` has the same preconditions and
+postconditions as [`append()`](#FileSystem.append).
### `FSDataInputStream open(Path f, int bufferSize)`
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
new file mode 100644
index 0000000000..4ea1fd168f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md
@@ -0,0 +1,182 @@
+
+
+
+
+
+
+# class `org.apache.hadoop.fs.FSDataOutputStreamBuilder`
+
+
+
+Builder pattern for `FSDataOutputStream` and its subclasses. It is used to
+create a new file or open an existing file on `FileSystem` for write.
+
+## Invariants
+
+The `FSDataOutputStreamBuilder` interface does not validate parameters
+and modify the state of `FileSystem` until [`build()`](#Builder.build) is
+invoked.
+
+## Implementation-agnostic parameters.
+
+### `FSDataOutputStreamBuilder create()`
+
+Specify `FSDataOutputStreamBuilder` to create a file on `FileSystem`, equivalent
+to `CreateFlag#CREATE`.
+
+### `FSDataOutputStreamBuilder append()`
+
+Specify `FSDataOutputStreamBuilder` to append to an existing file on
+`FileSystem`, equivalent to `CreateFlag#APPEND`.
+
+### `FSDataOutputStreamBuilder overwrite(boolean overwrite)`
+
+Specify `FSDataOutputStreamBuilder` to overwrite an existing file or not. If
+giving `overwrite==true`, it truncates an existing file, equivalent to
+`CreateFlag#OVERWITE`.
+
+### `FSDataOutputStreamBuilder permission(FsPermission permission)`
+
+Set permission for the file.
+
+### `FSDataOutputStreamBuilder bufferSize(int bufSize)`
+
+Set the size of the buffer to be used.
+
+### `FSDataOutputStreamBuilder replication(short replica)`
+
+Set the replication factor.
+
+### `FSDataOutputStreamBuilder blockSize(long size)`
+
+Set block size in bytes.
+
+### `FSDataOutputStreamBuilder recursive()`
+
+Create parent directories if they do not exist.
+
+### `FSDataOutputStreamBuilder progress(Progresable prog)`
+
+Set the facility of reporting progress.
+
+### `FSDataOutputStreamBuilder checksumOpt(ChecksumOpt chksumOpt)`
+
+Set checksum opt.
+
+### Set optional or mandatory parameters
+
+ FSDataOutputStreamBuilder opt(String key, ...)
+ FSDataOutputStreamBuilder must(String key, ...)
+
+Set optional or mandatory parameters to the builder. Using `opt()` or `must()`,
+client can specify FS-specific parameters without inspecting the concrete type
+of `FileSystem`.
+
+ // Don't
+ if (fs instanceof FooFileSystem) {
+ FooFileSystem fs = (FooFileSystem) fs;
+ out = dfs.createFile(path)
+ .optionA()
+ .optionB("value")
+ .cache()
+ .build()
+ } else if (fs instanceof BarFileSystem) {
+ ...
+ }
+
+ // Do
+ out = fs.createFile(path)
+ .permission(perm)
+ .bufferSize(bufSize)
+ .opt("foofs:option.a", true)
+ .opt("foofs:option.b", "value")
+ .opt("barfs:cache", true)
+ .must("foofs:cache", true)
+ .must("barfs:cache-size", 256 * 1024 * 1024)
+ .build();
+
+#### Implementation Notes
+
+The concrete `FileSystem` and/or `FSDataOutputStreamBuilder` implementation
+MUST verify that implementation-agnostic parameters (i.e., "syncable") or
+implementation-specific parameters (i.e., "foofs:cache")
+are supported. `FileSystem` will satisfy optional parameters (via `opt(key, ...)`)
+on best effort. If the mandatory parameters (via `must(key, ...)`) can not be satisfied
+in the `FileSystem`, `IllegalArgumentException` should be thrown in `build()`.
+
+The behavior of resolving the conflicts between the parameters set by
+builder methods (i.e., `bufferSize()`) and `opt()`/`must()` is undefined.
+
+## HDFS-specific parameters.
+
+`HdfsDataOutputStreamBuilder extends FSDataOutputStreamBuilder` provides additional
+HDFS-specific parameters, for further customize file creation / append behavior.
+
+### `FSDataOutpuStreamBuilder favoredNodes(InetSocketAddress[] nodes)`
+
+Set favored DataNodes for new blocks.
+
+### `FSDataOutputStreamBuilder syncBlock()`
+
+Force closed blocks to the disk device. See `CreateFlag#SYNC_BLOCK`
+
+### `FSDataOutputStreamBuilder lazyPersist()`
+
+Create the block on transient storage if possible.
+
+### `FSDataOutputStreamBuilder newBlock()`
+
+Append data to a new block instead of the end of the last partial block.
+
+### `FSDataOutputStreamBuilder noLocalWrite()`
+
+Advise that a block replica NOT be written to the local DataNode.
+
+### `FSDataOutputStreamBuilder ecPolicyName()`
+
+Enforce the file to be a striped file with erasure coding policy 'policyName',
+no matter what its parent directory's replication or erasure coding policy is.
+
+### `FSDataOutputStreamBuilder replicate()`
+
+Enforce the file to be a replicated file, no matter what its parent directory's
+replication or erasure coding policy is.
+
+## Builder interface
+
+### `FSDataOutputStream build()`
+
+Create a new file or append an existing file on the underlying `FileSystem`,
+and return `FSDataOutputStream` for write.
+
+#### Preconditions
+
+The following combinations of parameters are not supported:
+
+ if APPEND|OVERWRITE: raise HadoopIllegalArgumentException
+ if CREATE|APPEND|OVERWRITE: raise HadoopIllegalArgumentExdeption
+
+`FileSystem` may reject the request for other reasons and throw `IOException`,
+see `FileSystem#create(path, ...)` and `FileSystem#append()`.
+
+#### Postconditions
+
+ FS' where :
+ FS'.Files'[p] == []
+ ancestors(p) is-subset-of FS'.Directories'
+
+ result = FSDataOutputStream
+
+The result is `FSDataOutputStream` to be used to write data to filesystem.
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
index 66a7eb3f36..532b6c7b68 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md
@@ -33,5 +33,6 @@ HDFS as these are commonly expected by Hadoop client applications.
1. [Model](model.html)
1. [FileSystem class](filesystem.html)
1. [FSDataInputStream class](fsdatainputstream.html)
+1. [FSDataOutputStreamBuilder class](fsdataoutputstreambuilder.html)
2. [Testing with the Filesystem specification](testing.html)
2. [Extending the specification and its tests](extending.html)