HADOOP-14398. Modify documents for the FileSystem Builder API. (Lei (Eddy) Xu)

This commit is contained in:
Lei Xu 2017-08-17 18:06:23 -07:00
parent 4230872dd6
commit 99e558b13b
4 changed files with 272 additions and 18 deletions

View File

@ -54,16 +54,29 @@
* options accordingly, for example: * options accordingly, for example:
* *
* <code> * <code>
* FSDataOutputStreamBuilder builder = fs.createFile(path); *
* builder.permission(perm) * // Don't
* .bufferSize(bufSize) * if (fs instanceof FooFileSystem) {
* .opt("dfs.outputstream.builder.lazy-persist", true) * FooFileSystem fs = (FooFileSystem) fs;
* .opt("dfs.outputstream.builder.ec.policy-name", "rs-3-2-64k") * OutputStream out = dfs.createFile(path)
* .opt("fs.local.o-direct", true) * .optionA()
* .must("fs.s3a.fast-upload", true) * .optionB("value")
* .must("fs.azure.buffer-size", 256 * 1024 * 1024); * .cache()
* FSDataOutputStream out = builder.build(); * .build()
* } else if (fs instanceof BarFileSystem) {
* ... * ...
* }
*
* // Do
* OutputStream out = fs.createFile(path)
* .permission(perm)
* .bufferSize(bufSize)
* .opt("foofs:option.a", true)
* .opt("foofs:option.b", "value")
* .opt("barfs:cache", true)
* .must("foofs:cache", true)
* .must("barfs:cache-size", 256 * 1024 * 1024)
* .build();
* </code> * </code>
* *
* If the option is not related to the file system, the option will be ignored. * If the option is not related to the file system, the option will be ignored.
@ -263,6 +276,8 @@ public B opt(@Nonnull final String key, @Nonnull final String value) {
/** /**
* Set optional boolean parameter for the Builder. * Set optional boolean parameter for the Builder.
*
* @see #opt(String, String)
*/ */
public B opt(@Nonnull final String key, boolean value) { public B opt(@Nonnull final String key, boolean value) {
mandatoryKeys.remove(key); mandatoryKeys.remove(key);
@ -272,6 +287,8 @@ public B opt(@Nonnull final String key, boolean value) {
/** /**
* Set optional int parameter for the Builder. * Set optional int parameter for the Builder.
*
* @see #opt(String, String)
*/ */
public B opt(@Nonnull final String key, int value) { public B opt(@Nonnull final String key, int value) {
mandatoryKeys.remove(key); mandatoryKeys.remove(key);
@ -281,6 +298,8 @@ public B opt(@Nonnull final String key, int value) {
/** /**
* Set optional float parameter for the Builder. * Set optional float parameter for the Builder.
*
* @see #opt(String, String)
*/ */
public B opt(@Nonnull final String key, float value) { public B opt(@Nonnull final String key, float value) {
mandatoryKeys.remove(key); mandatoryKeys.remove(key);
@ -290,6 +309,8 @@ public B opt(@Nonnull final String key, float value) {
/** /**
* Set optional double parameter for the Builder. * Set optional double parameter for the Builder.
*
* @see #opt(String, String)
*/ */
public B opt(@Nonnull final String key, double value) { public B opt(@Nonnull final String key, double value) {
mandatoryKeys.remove(key); mandatoryKeys.remove(key);
@ -299,6 +320,8 @@ public B opt(@Nonnull final String key, double value) {
/** /**
* Set an array of string values as optional parameter for the Builder. * Set an array of string values as optional parameter for the Builder.
*
* @see #opt(String, String)
*/ */
public B opt(@Nonnull final String key, @Nonnull final String... values) { public B opt(@Nonnull final String key, @Nonnull final String... values) {
mandatoryKeys.remove(key); mandatoryKeys.remove(key);
@ -310,8 +333,7 @@ public B opt(@Nonnull final String key, @Nonnull final String... values) {
* Set mandatory option to the Builder. * Set mandatory option to the Builder.
* *
* If the option is not supported or unavailable on the {@link FileSystem}, * If the option is not supported or unavailable on the {@link FileSystem},
* the client should expect {@link #build()} throws * the client should expect {@link #build()} throws IllegalArgumentException.
* {@link IllegalArgumentException}.
*/ */
public B must(@Nonnull final String key, @Nonnull final String value) { public B must(@Nonnull final String key, @Nonnull final String value) {
mandatoryKeys.add(key); mandatoryKeys.add(key);
@ -319,35 +341,55 @@ public B must(@Nonnull final String key, @Nonnull final String value) {
return getThisBuilder(); return getThisBuilder();
} }
/** Set mandatory boolean option. */ /**
* Set mandatory boolean option.
*
* @see #must(String, String)
*/
public B must(@Nonnull final String key, boolean value) { public B must(@Nonnull final String key, boolean value) {
mandatoryKeys.add(key); mandatoryKeys.add(key);
options.setBoolean(key, value); options.setBoolean(key, value);
return getThisBuilder(); return getThisBuilder();
} }
/** Set mandatory int option. */ /**
* Set mandatory int option.
*
* @see #must(String, String)
*/
public B must(@Nonnull final String key, int value) { public B must(@Nonnull final String key, int value) {
mandatoryKeys.add(key); mandatoryKeys.add(key);
options.setInt(key, value); options.setInt(key, value);
return getThisBuilder(); return getThisBuilder();
} }
/** Set mandatory float option. */ /**
* Set mandatory float option.
*
* @see #must(String, String)
*/
public B must(@Nonnull final String key, float value) { public B must(@Nonnull final String key, float value) {
mandatoryKeys.add(key); mandatoryKeys.add(key);
options.setFloat(key, value); options.setFloat(key, value);
return getThisBuilder(); return getThisBuilder();
} }
/** Set mandatory double option. */ /**
* Set mandatory double option.
*
* @see #must(String, String)
*/
public B must(@Nonnull final String key, double value) { public B must(@Nonnull final String key, double value) {
mandatoryKeys.add(key); mandatoryKeys.add(key);
options.setDouble(key, value); options.setDouble(key, value);
return getThisBuilder(); return getThisBuilder();
} }
/** Set a string array as mandatory option. */ /**
* Set a string array as mandatory option.
*
* @see #must(String, String)
*/
public B must(@Nonnull final String key, @Nonnull final String... values) { public B must(@Nonnull final String key, @Nonnull final String... values) {
mandatoryKeys.add(key); mandatoryKeys.add(key);
options.setStrings(key, values); options.setStrings(key, values);

View File

@ -553,7 +553,7 @@ on a path that exists and is a file. Instead the operation returns false.
FS' = FS FS' = FS
result = False result = False
### `FSDataOutputStream create(Path, ...)` ### <a name='FileSystem.create'></a> `FSDataOutputStream create(Path, ...)`
FSDataOutputStream create(Path p, FSDataOutputStream create(Path p,
@ -616,7 +616,24 @@ this precondition fails.
* Not covered: symlinks. The resolved path of the symlink is used as the final path argument to the `create()` operation * Not covered: symlinks. The resolved path of the symlink is used as the final path argument to the `create()` operation
### `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)` ### `FSDataOutputStreamBuilder createFile(Path p)`
Make a `FSDataOutputStreamBuilder` to specify the parameters to create a file.
#### Implementation Notes
`createFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make
change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`,
the builder parameters are verified and [`create(Path p)`](#FileSystem.create)
is invoked on the underlying filesystem. `build()` has the same preconditions
and postconditions as [`create(Path p)`](#FileSystem.create).
* Similar to [`create(Path p)`](#FileSystem.create), files are overwritten
by default, unless specify `builder.overwrite(false)`.
* Unlike [`create(Path p)`](#FileSystem.create), missing parent directories are
not created by default, unless specify `builder.recursive()`.
### <a name='FileSystem.append'></a> `FSDataOutputStream append(Path p, int bufferSize, Progressable progress)`
Implementations without a compliant call SHOULD throw `UnsupportedOperationException`. Implementations without a compliant call SHOULD throw `UnsupportedOperationException`.
@ -634,6 +651,18 @@ Implementations without a compliant call SHOULD throw `UnsupportedOperationExcep
Return: `FSDataOutputStream`, which can update the entry `FS.Files[p]` Return: `FSDataOutputStream`, which can update the entry `FS.Files[p]`
by appending data to the existing list. by appending data to the existing list.
### `FSDataOutputStreamBuilder appendFile(Path p)`
Make a `FSDataOutputStreamBuilder` to specify the parameters to append to an
existing file.
#### Implementation Notes
`appendFile(p)` returns a `FSDataOutputStreamBuilder` only and does not make
change on filesystem immediately. When `build()` is invoked on the `FSDataOutputStreamBuilder`,
the builder parameters are verified and [`append()`](#FileSystem.append) is
invoked on the underlying filesystem. `build()` has the same preconditions and
postconditions as [`append()`](#FileSystem.append).
### `FSDataInputStream open(Path f, int bufferSize)` ### `FSDataInputStream open(Path f, int bufferSize)`

View File

@ -0,0 +1,182 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- ============================================================= -->
<!-- CLASS: FSDataOutputStreamBuilder -->
<!-- ============================================================= -->
# class `org.apache.hadoop.fs.FSDataOutputStreamBuilder`
<!-- MACRO{toc|fromDepth=1|toDepth=2} -->
Builder pattern for `FSDataOutputStream` and its subclasses. It is used to
create a new file or open an existing file on `FileSystem` for write.
## Invariants
The `FSDataOutputStreamBuilder` interface does not validate parameters
and modify the state of `FileSystem` until [`build()`](#Builder.build) is
invoked.
## Implementation-agnostic parameters.
### <a name="Builder.create"></a> `FSDataOutputStreamBuilder create()`
Specify `FSDataOutputStreamBuilder` to create a file on `FileSystem`, equivalent
to `CreateFlag#CREATE`.
### <a name="Builder.append"></a> `FSDataOutputStreamBuilder append()`
Specify `FSDataOutputStreamBuilder` to append to an existing file on
`FileSystem`, equivalent to `CreateFlag#APPEND`.
### <a name="Builder.overwrite"></a> `FSDataOutputStreamBuilder overwrite(boolean overwrite)`
Specify `FSDataOutputStreamBuilder` to overwrite an existing file or not. If
giving `overwrite==true`, it truncates an existing file, equivalent to
`CreateFlag#OVERWITE`.
### <a name="Builder.permission"></a> `FSDataOutputStreamBuilder permission(FsPermission permission)`
Set permission for the file.
### <a name="Builder.bufferSize"></a> `FSDataOutputStreamBuilder bufferSize(int bufSize)`
Set the size of the buffer to be used.
### <a name="Builder.replication"></a> `FSDataOutputStreamBuilder replication(short replica)`
Set the replication factor.
### <a name="Builder.blockSize"></a> `FSDataOutputStreamBuilder blockSize(long size)`
Set block size in bytes.
### <a name="Builder.recursive"></a> `FSDataOutputStreamBuilder recursive()`
Create parent directories if they do not exist.
### <a name="Builder.progress"></a> `FSDataOutputStreamBuilder progress(Progresable prog)`
Set the facility of reporting progress.
### <a name="Builder.checksumOpt"></a> `FSDataOutputStreamBuilder checksumOpt(ChecksumOpt chksumOpt)`
Set checksum opt.
### Set optional or mandatory parameters
FSDataOutputStreamBuilder opt(String key, ...)
FSDataOutputStreamBuilder must(String key, ...)
Set optional or mandatory parameters to the builder. Using `opt()` or `must()`,
client can specify FS-specific parameters without inspecting the concrete type
of `FileSystem`.
// Don't
if (fs instanceof FooFileSystem) {
FooFileSystem fs = (FooFileSystem) fs;
out = dfs.createFile(path)
.optionA()
.optionB("value")
.cache()
.build()
} else if (fs instanceof BarFileSystem) {
...
}
// Do
out = fs.createFile(path)
.permission(perm)
.bufferSize(bufSize)
.opt("foofs:option.a", true)
.opt("foofs:option.b", "value")
.opt("barfs:cache", true)
.must("foofs:cache", true)
.must("barfs:cache-size", 256 * 1024 * 1024)
.build();
#### Implementation Notes
The concrete `FileSystem` and/or `FSDataOutputStreamBuilder` implementation
MUST verify that implementation-agnostic parameters (i.e., "syncable") or
implementation-specific parameters (i.e., "foofs:cache")
are supported. `FileSystem` will satisfy optional parameters (via `opt(key, ...)`)
on best effort. If the mandatory parameters (via `must(key, ...)`) can not be satisfied
in the `FileSystem`, `IllegalArgumentException` should be thrown in `build()`.
The behavior of resolving the conflicts between the parameters set by
builder methods (i.e., `bufferSize()`) and `opt()`/`must()` is undefined.
## HDFS-specific parameters.
`HdfsDataOutputStreamBuilder extends FSDataOutputStreamBuilder` provides additional
HDFS-specific parameters, for further customize file creation / append behavior.
### `FSDataOutpuStreamBuilder favoredNodes(InetSocketAddress[] nodes)`
Set favored DataNodes for new blocks.
### `FSDataOutputStreamBuilder syncBlock()`
Force closed blocks to the disk device. See `CreateFlag#SYNC_BLOCK`
### `FSDataOutputStreamBuilder lazyPersist()`
Create the block on transient storage if possible.
### `FSDataOutputStreamBuilder newBlock()`
Append data to a new block instead of the end of the last partial block.
### `FSDataOutputStreamBuilder noLocalWrite()`
Advise that a block replica NOT be written to the local DataNode.
### `FSDataOutputStreamBuilder ecPolicyName()`
Enforce the file to be a striped file with erasure coding policy 'policyName',
no matter what its parent directory's replication or erasure coding policy is.
### `FSDataOutputStreamBuilder replicate()`
Enforce the file to be a replicated file, no matter what its parent directory's
replication or erasure coding policy is.
## Builder interface
### <a name="Builder.build"></a> `FSDataOutputStream build()`
Create a new file or append an existing file on the underlying `FileSystem`,
and return `FSDataOutputStream` for write.
#### Preconditions
The following combinations of parameters are not supported:
if APPEND|OVERWRITE: raise HadoopIllegalArgumentException
if CREATE|APPEND|OVERWRITE: raise HadoopIllegalArgumentExdeption
`FileSystem` may reject the request for other reasons and throw `IOException`,
see `FileSystem#create(path, ...)` and `FileSystem#append()`.
#### Postconditions
FS' where :
FS'.Files'[p] == []
ancestors(p) is-subset-of FS'.Directories'
result = FSDataOutputStream
The result is `FSDataOutputStream` to be used to write data to filesystem.

View File

@ -33,5 +33,6 @@ HDFS as these are commonly expected by Hadoop client applications.
1. [Model](model.html) 1. [Model](model.html)
1. [FileSystem class](filesystem.html) 1. [FileSystem class](filesystem.html)
1. [FSDataInputStream class](fsdatainputstream.html) 1. [FSDataInputStream class](fsdatainputstream.html)
1. [FSDataOutputStreamBuilder class](fsdataoutputstreambuilder.html)
2. [Testing with the Filesystem specification](testing.html) 2. [Testing with the Filesystem specification](testing.html)
2. [Extending the specification and its tests](extending.html) 2. [Extending the specification and its tests](extending.html)