diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java index 1f668eb677..86c284a9e8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java @@ -54,16 +54,29 @@ * options accordingly, for example: * * - * FSDataOutputStreamBuilder builder = fs.createFile(path); - * builder.permission(perm) + * + * // Don't + * if (fs instanceof FooFileSystem) { + * FooFileSystem fs = (FooFileSystem) fs; + * OutputStream out = dfs.createFile(path) + * .optionA() + * .optionB("value") + * .cache() + * .build() + * } else if (fs instanceof BarFileSystem) { + * ... + * } + * + * // Do + * OutputStream out = fs.createFile(path) + * .permission(perm) * .bufferSize(bufSize) - * .opt("dfs.outputstream.builder.lazy-persist", true) - * .opt("dfs.outputstream.builder.ec.policy-name", "rs-3-2-64k") - * .opt("fs.local.o-direct", true) - * .must("fs.s3a.fast-upload", true) - * .must("fs.azure.buffer-size", 256 * 1024 * 1024); - * FSDataOutputStream out = builder.build(); - * ... + * .opt("foofs:option.a", true) + * .opt("foofs:option.b", "value") + * .opt("barfs:cache", true) + * .must("foofs:cache", true) + * .must("barfs:cache-size", 256 * 1024 * 1024) + * .build(); * * * If the option is not related to the file system, the option will be ignored. @@ -263,6 +276,8 @@ public B opt(@Nonnull final String key, @Nonnull final String value) { /** * Set optional boolean parameter for the Builder. + * + * @see #opt(String, String) */ public B opt(@Nonnull final String key, boolean value) { mandatoryKeys.remove(key); @@ -272,6 +287,8 @@ public B opt(@Nonnull final String key, boolean value) { /** * Set optional int parameter for the Builder. + * + * @see #opt(String, String) */ public B opt(@Nonnull final String key, int value) { mandatoryKeys.remove(key); @@ -281,6 +298,8 @@ public B opt(@Nonnull final String key, int value) { /** * Set optional float parameter for the Builder. + * + * @see #opt(String, String) */ public B opt(@Nonnull final String key, float value) { mandatoryKeys.remove(key); @@ -290,6 +309,8 @@ public B opt(@Nonnull final String key, float value) { /** * Set optional double parameter for the Builder. + * + * @see #opt(String, String) */ public B opt(@Nonnull final String key, double value) { mandatoryKeys.remove(key); @@ -299,6 +320,8 @@ public B opt(@Nonnull final String key, double value) { /** * Set an array of string values as optional parameter for the Builder. + * + * @see #opt(String, String) */ public B opt(@Nonnull final String key, @Nonnull final String... values) { mandatoryKeys.remove(key); @@ -310,8 +333,7 @@ public B opt(@Nonnull final String key, @Nonnull final String... values) { * Set mandatory option to the Builder. * * If the option is not supported or unavailable on the {@link FileSystem}, - * the client should expect {@link #build()} throws - * {@link IllegalArgumentException}. + * the client should expect {@link #build()} throws IllegalArgumentException. */ public B must(@Nonnull final String key, @Nonnull final String value) { mandatoryKeys.add(key); @@ -319,35 +341,55 @@ public B must(@Nonnull final String key, @Nonnull final String value) { return getThisBuilder(); } - /** Set mandatory boolean option. */ + /** + * Set mandatory boolean option. + * + * @see #must(String, String) + */ public B must(@Nonnull final String key, boolean value) { mandatoryKeys.add(key); options.setBoolean(key, value); return getThisBuilder(); } - /** Set mandatory int option. */ + /** + * Set mandatory int option. + * + * @see #must(String, String) + */ public B must(@Nonnull final String key, int value) { mandatoryKeys.add(key); options.setInt(key, value); return getThisBuilder(); } - /** Set mandatory float option. */ + /** + * Set mandatory float option. + * + * @see #must(String, String) + */ public B must(@Nonnull final String key, float value) { mandatoryKeys.add(key); options.setFloat(key, value); return getThisBuilder(); } - /** Set mandatory double option. */ + /** + * Set mandatory double option. + * + * @see #must(String, String) + */ public B must(@Nonnull final String key, double value) { mandatoryKeys.add(key); options.setDouble(key, value); return getThisBuilder(); } - /** Set a string array as mandatory option. */ + /** + * Set a string array as mandatory option. + * + * @see #must(String, String) + */ public B must(@Nonnull final String key, @Nonnull final String... values) { mandatoryKeys.add(key); options.setStrings(key, values); diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index d7e57cef3e..1e522c7782 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -553,7 +553,7 @@ on a path that exists and is a file. Instead the operation returns false. FS' = FS result = False -### `FSDataOutputStream create(Path, ...)` +### `FSDataOutputStream create(Path, ...)` FSDataOutputStream create(Path p, @@ -616,7 +616,24 @@ this precondition fails. * Not covered: symlinks. The resolved path of the symlink is used as the final path argument to the `create()` operation -### `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)` +### `FSDataOutputStreamBuilder createFile(Path p)` + +Make a `FSDataOutputStreamBuilder` to specify the parameters to create a file. + +#### Implementation Notes + +`createFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make +change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`, +the builder parameters are verified and [`create(Path p)`](#FileSystem.create) +is invoked on the underlying filesystem. `build()` has the same preconditions +and postconditions as [`create(Path p)`](#FileSystem.create). + +* Similar to [`create(Path p)`](#FileSystem.create), files are overwritten +by default, unless specify `builder.overwrite(false)`. +* Unlike [`create(Path p)`](#FileSystem.create), missing parent directories are +not created by default, unless specify `builder.recursive()`. + +### `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)` Implementations without a compliant call SHOULD throw `UnsupportedOperationException`. @@ -634,6 +651,18 @@ Implementations without a compliant call SHOULD throw `UnsupportedOperationExcep Return: `FSDataOutputStream`, which can update the entry `FS.Files[p]` by appending data to the existing list. +### `FSDataOutputStreamBuilder appendFile(Path p)` + +Make a `FSDataOutputStreamBuilder` to specify the parameters to append to an +existing file. + +#### Implementation Notes + +`appendFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make +change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`, +the builder parameters are verified and [`append()`](#FileSystem.append) is +invoked on the underlying filesystem. `build()` has the same preconditions and +postconditions as [`append()`](#FileSystem.append). ### `FSDataInputStream open(Path f, int bufferSize)` diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md new file mode 100644 index 0000000000..4ea1fd168f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md @@ -0,0 +1,182 @@ + + + + + + +# class `org.apache.hadoop.fs.FSDataOutputStreamBuilder` + + + +Builder pattern for `FSDataOutputStream` and its subclasses. It is used to +create a new file or open an existing file on `FileSystem` for write. + +## Invariants + +The `FSDataOutputStreamBuilder` interface does not validate parameters +and modify the state of `FileSystem` until [`build()`](#Builder.build) is +invoked. + +## Implementation-agnostic parameters. + +### `FSDataOutputStreamBuilder create()` + +Specify `FSDataOutputStreamBuilder` to create a file on `FileSystem`, equivalent +to `CreateFlag#CREATE`. + +### `FSDataOutputStreamBuilder append()` + +Specify `FSDataOutputStreamBuilder` to append to an existing file on +`FileSystem`, equivalent to `CreateFlag#APPEND`. + +### `FSDataOutputStreamBuilder overwrite(boolean overwrite)` + +Specify `FSDataOutputStreamBuilder` to overwrite an existing file or not. If +giving `overwrite==true`, it truncates an existing file, equivalent to +`CreateFlag#OVERWITE`. + +### `FSDataOutputStreamBuilder permission(FsPermission permission)` + +Set permission for the file. + +### `FSDataOutputStreamBuilder bufferSize(int bufSize)` + +Set the size of the buffer to be used. + +### `FSDataOutputStreamBuilder replication(short replica)` + +Set the replication factor. + +### `FSDataOutputStreamBuilder blockSize(long size)` + +Set block size in bytes. + +### `FSDataOutputStreamBuilder recursive()` + +Create parent directories if they do not exist. + +### `FSDataOutputStreamBuilder progress(Progresable prog)` + +Set the facility of reporting progress. + +### `FSDataOutputStreamBuilder checksumOpt(ChecksumOpt chksumOpt)` + +Set checksum opt. + +### Set optional or mandatory parameters + + FSDataOutputStreamBuilder opt(String key, ...) + FSDataOutputStreamBuilder must(String key, ...) + +Set optional or mandatory parameters to the builder. Using `opt()` or `must()`, +client can specify FS-specific parameters without inspecting the concrete type +of `FileSystem`. + + // Don't + if (fs instanceof FooFileSystem) { + FooFileSystem fs = (FooFileSystem) fs; + out = dfs.createFile(path) + .optionA() + .optionB("value") + .cache() + .build() + } else if (fs instanceof BarFileSystem) { + ... + } + + // Do + out = fs.createFile(path) + .permission(perm) + .bufferSize(bufSize) + .opt("foofs:option.a", true) + .opt("foofs:option.b", "value") + .opt("barfs:cache", true) + .must("foofs:cache", true) + .must("barfs:cache-size", 256 * 1024 * 1024) + .build(); + +#### Implementation Notes + +The concrete `FileSystem` and/or `FSDataOutputStreamBuilder` implementation +MUST verify that implementation-agnostic parameters (i.e., "syncable") or +implementation-specific parameters (i.e., "foofs:cache") +are supported. `FileSystem` will satisfy optional parameters (via `opt(key, ...)`) +on best effort. If the mandatory parameters (via `must(key, ...)`) can not be satisfied +in the `FileSystem`, `IllegalArgumentException` should be thrown in `build()`. + +The behavior of resolving the conflicts between the parameters set by +builder methods (i.e., `bufferSize()`) and `opt()`/`must()` is undefined. + +## HDFS-specific parameters. + +`HdfsDataOutputStreamBuilder extends FSDataOutputStreamBuilder` provides additional +HDFS-specific parameters, for further customize file creation / append behavior. + +### `FSDataOutpuStreamBuilder favoredNodes(InetSocketAddress[] nodes)` + +Set favored DataNodes for new blocks. + +### `FSDataOutputStreamBuilder syncBlock()` + +Force closed blocks to the disk device. See `CreateFlag#SYNC_BLOCK` + +### `FSDataOutputStreamBuilder lazyPersist()` + +Create the block on transient storage if possible. + +### `FSDataOutputStreamBuilder newBlock()` + +Append data to a new block instead of the end of the last partial block. + +### `FSDataOutputStreamBuilder noLocalWrite()` + +Advise that a block replica NOT be written to the local DataNode. + +### `FSDataOutputStreamBuilder ecPolicyName()` + +Enforce the file to be a striped file with erasure coding policy 'policyName', +no matter what its parent directory's replication or erasure coding policy is. + +### `FSDataOutputStreamBuilder replicate()` + +Enforce the file to be a replicated file, no matter what its parent directory's +replication or erasure coding policy is. + +## Builder interface + +### `FSDataOutputStream build()` + +Create a new file or append an existing file on the underlying `FileSystem`, +and return `FSDataOutputStream` for write. + +#### Preconditions + +The following combinations of parameters are not supported: + + if APPEND|OVERWRITE: raise HadoopIllegalArgumentException + if CREATE|APPEND|OVERWRITE: raise HadoopIllegalArgumentExdeption + +`FileSystem` may reject the request for other reasons and throw `IOException`, +see `FileSystem#create(path, ...)` and `FileSystem#append()`. + +#### Postconditions + + FS' where : + FS'.Files'[p] == [] + ancestors(p) is-subset-of FS'.Directories' + + result = FSDataOutputStream + +The result is `FSDataOutputStream` to be used to write data to filesystem. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md index 66a7eb3f36..532b6c7b68 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md @@ -33,5 +33,6 @@ HDFS as these are commonly expected by Hadoop client applications. 1. [Model](model.html) 1. [FileSystem class](filesystem.html) 1. [FSDataInputStream class](fsdatainputstream.html) +1. [FSDataOutputStreamBuilder class](fsdataoutputstreambuilder.html) 2. [Testing with the Filesystem specification](testing.html) 2. [Extending the specification and its tests](extending.html)