From 8af8453589232695c01b362872c0bd83686b2184 Mon Sep 17 00:00:00 2001 From: Anu Engineer Date: Mon, 17 Sep 2018 10:46:28 -0700 Subject: [PATCH] HDDS-435. Enhance the existing ozone documentation. Contributed by Elek, Marton. --- dev-support/bin/ozone-dist-layout-stitching | 2 + hadoop-ozone/common/src/main/bin/ozone | 1 - .../docs/{config.toml => config.yaml} | 32 +- hadoop-ozone/docs/content/BucketCommands.md | 106 +++++ hadoop-ozone/docs/content/BuildingSources.md | 37 ++ hadoop-ozone/docs/content/CommandShell.md | 206 ++++------ hadoop-ozone/docs/content/Concepts.md | 101 +++++ hadoop-ozone/docs/content/Dozone.md | 107 +++++ hadoop-ozone/docs/content/Freon.md | 61 +++ hadoop-ozone/docs/content/GettingStarted.md | 369 ------------------ hadoop-ozone/docs/content/Hdds.md | 49 +++ hadoop-ozone/docs/content/JavaApi.md | 152 ++++++++ hadoop-ozone/docs/content/KeyCommands.md | 111 ++++++ hadoop-ozone/docs/content/Metrics.md | 170 -------- hadoop-ozone/docs/content/OzoneFS.md | 64 +++ hadoop-ozone/docs/content/OzoneManager.md | 61 +++ hadoop-ozone/docs/content/RealCluster.md | 71 ++++ hadoop-ozone/docs/content/Rest.md | 32 +- hadoop-ozone/docs/content/RunningViaDocker.md | 70 ++++ hadoop-ozone/docs/content/RunningWithHDFS.md | 61 +++ hadoop-ozone/docs/content/SCMCLI.md | 26 ++ hadoop-ozone/docs/content/Settings.md | 139 +++++++ hadoop-ozone/docs/content/VolumeCommands.md | 100 +++++ hadoop-ozone/docs/content/_index.md | 86 +--- hadoop-ozone/docs/pom.xml | 4 - .../ozonedoc/layouts/_default/single.html | 3 + .../ozonedoc/layouts/partials/header.html | 4 +- .../ozonedoc/layouts/partials/sidebar.html | 8 +- .../themes/ozonedoc/static/css/ozonedoc.css | 14 +- 29 files changed, 1464 insertions(+), 783 deletions(-) rename hadoop-ozone/docs/{config.toml => config.yaml} (62%) create mode 100644 hadoop-ozone/docs/content/BucketCommands.md create mode 100644 hadoop-ozone/docs/content/BuildingSources.md create mode 100644 hadoop-ozone/docs/content/Concepts.md create mode 100644 hadoop-ozone/docs/content/Dozone.md create mode 100644 hadoop-ozone/docs/content/Freon.md delete mode 100644 hadoop-ozone/docs/content/GettingStarted.md create mode 100644 hadoop-ozone/docs/content/Hdds.md create mode 100644 hadoop-ozone/docs/content/JavaApi.md create mode 100644 hadoop-ozone/docs/content/KeyCommands.md delete mode 100644 hadoop-ozone/docs/content/Metrics.md create mode 100644 hadoop-ozone/docs/content/OzoneFS.md create mode 100644 hadoop-ozone/docs/content/OzoneManager.md create mode 100644 hadoop-ozone/docs/content/RealCluster.md create mode 100644 hadoop-ozone/docs/content/RunningViaDocker.md create mode 100644 hadoop-ozone/docs/content/RunningWithHDFS.md create mode 100644 hadoop-ozone/docs/content/SCMCLI.md create mode 100644 hadoop-ozone/docs/content/Settings.md create mode 100644 hadoop-ozone/docs/content/VolumeCommands.md diff --git a/dev-support/bin/ozone-dist-layout-stitching b/dev-support/bin/ozone-dist-layout-stitching index 128ce10d52..b4d94b300d 100755 --- a/dev-support/bin/ozone-dist-layout-stitching +++ b/dev-support/bin/ozone-dist-layout-stitching @@ -156,6 +156,8 @@ cp "${ROOT}/hadoop-ozone/objectstore-service/target/hadoop-ozone-objectstore-ser # Optional documentation, could be missing cp -r "${ROOT}/hadoop-ozone/docs/target/classes/webapps/docs" ./share/hadoop/ozone/webapps/ozoneManager/ cp -r "${ROOT}/hadoop-ozone/docs/target/classes/webapps/docs" ./share/hadoop/hdds/webapps/scm/ +cp -r "${ROOT}/hadoop-ozone/docs/target/classes/webapps/docs" ./ + rm sbin/*all.sh rm sbin/*all.cmd diff --git a/hadoop-ozone/common/src/main/bin/ozone b/hadoop-ozone/common/src/main/bin/ozone index 5ad6801fd5..6bf8b012f2 100755 --- a/hadoop-ozone/common/src/main/bin/ozone +++ b/hadoop-ozone/common/src/main/bin/ozone @@ -32,7 +32,6 @@ function hadoop_usage hadoop_add_option "--loglevel level" "set the log4j level for this command" hadoop_add_option "--workers" "turn on worker mode" - hadoop_add_subcommand "classpath" client "prints the class path needed to get the hadoop jar and the required libraries" hadoop_add_subcommand "datanode" daemon "run a HDDS datanode" hadoop_add_subcommand "envvars" client "display computed Hadoop environment variables" diff --git a/hadoop-ozone/docs/config.toml b/hadoop-ozone/docs/config.yaml similarity index 62% rename from hadoop-ozone/docs/config.toml rename to hadoop-ozone/docs/config.yaml index eed74a96bc..e86b59970c 100644 --- a/hadoop-ozone/docs/config.toml +++ b/hadoop-ozone/docs/config.yaml @@ -13,11 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -baseURL = "/" -languageCode = "en-us" -DefaultContentLanguage = "en" -title = "Ozone" -theme = "ozonedoc" -pygmentsCodeFences = true -pygmentsStyle = "monokailight" +languageCode: "en-us" +DefaultContentLanguage: "en" +title: "Ozone" +theme: "ozonedoc" +pygmentsCodeFences: true +uglyurls: true +relativeURLs: true + +menu: + main: + - identifier: Starting + name: "Getting Started" + title: "Getting Started" + url: runningviadocker.html + weight: 1 + - identifier: Client + name: Client + title: Client + url: commandshell.html + weight: 2 + - identifier: Tools + name: Tools + title: Tools + url: dozone.html + weight: 3 diff --git a/hadoop-ozone/docs/content/BucketCommands.md b/hadoop-ozone/docs/content/BucketCommands.md new file mode 100644 index 0000000000..dad11e34b8 --- /dev/null +++ b/hadoop-ozone/docs/content/BucketCommands.md @@ -0,0 +1,106 @@ +--- +title: Bucket Commands +menu: + main: + parent: Client + weight: 3 +--- + +Ozone shell supports the following bucket commands. + + * [create](#create) + * [delete](#delete) + * [info](#info) + * [list](#list) + * [update](#update) + +### Create + +The bucket create command allows a user to create a bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the bucket in **/volume/bucket** format. + +{{< highlight bash >}} +ozone sh bucket create /hive/jan +{{< /highlight >}} + +The above command will create a bucket called _jan_ in the _hive_ volume. +Since no scheme was specified this command defaults to O3 (RPC) protocol. + +### Delete + +The bucket delete commands allows an user to delete a volume. If the +bucket is not empty then this command will fail. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the bucket + +{{< highlight bash >}} +ozone sh volume delete /hive/jan +{{< /highlight >}} + +The above command will delete _jan_ bucket if it is empty. + +### Info + +The bucket info commands returns the information about the bucket. +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the bucket. + +{{< highlight bash >}} +ozone sh bucket info /hive/jan +{{< /highlight >}} + +The above command will print out the information about _jan_ bucket. + +### List + +The bucket list commands allows uset to list the buckets in a volume. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -l, --length | Maximum number of results to return. Default: 100 +| -p, --prefix | Optional, Only buckets that match this prefix will be returned. +| -s, --start | The listing will start from key after the start key. +| Uri | The name of the _volume_. + +{{< highlight bash >}} +ozone sh bucket list /hive +{{< /highlight >}} + +This command will list all buckets on the volume _hive_. + + + +### Update + +The bucket update command allows changing access permissions on bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| --addAcl | Optional, Comma separated ACLs that will added to bucket. +| --removeAcl | Optional, Comma separated list of acl to remove. +| Uri | The name of the bucket. + +{{< highlight bash >}} +ozone sh bucket update --addAcl=user:bilbo:rw /hive/jan +{{< /highlight >}} + +The above command gives user bilbo read/write permission to the bucket. + +You can try out these commands from the docker instance of the [Alpha +Cluster](runningviadocker.html). diff --git a/hadoop-ozone/docs/content/BuildingSources.md b/hadoop-ozone/docs/content/BuildingSources.md new file mode 100644 index 0000000000..2cad55b83c --- /dev/null +++ b/hadoop-ozone/docs/content/BuildingSources.md @@ -0,0 +1,37 @@ +--- +title: Building from Sources +weight: 1 +menu: + main: + parent: Starting + weight: 5 +--- +***This is a guide on how to build the ozone sources. If you are not +planning to build sources yourself, you can safely skip this page.*** + +If you are a Hadoop ninja, and wise in the ways of Apache, you already know +that a real Apache release is a source release. + +If you want to build from sources, Please untar the source tarball and run +the ozone build command. This instruction assumes that you have all the +dependencies to build Hadoop on your build machine. If you need instructions +on how to build Hadoop, please look at the Apache Hadoop Website. + +{{< highlight bash >}} +mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true -Phdds -Pdist -Dtar -DskipShade +{{< /highlight >}} + + +This will build an ozone-\.tar.gz in your target directory. + +You can copy this tarball and use this instead of binary artifacts that are +provided along with the official release. + +## How to test the build +You can run the acceptance tests in the hadoop-ozone directory to make sure +that your build is functional. To launch the acceptance tests, please follow + the instructions in the **README.md** in the + ```$hadoop_src/hadoop-ozone/acceptance-test``` directory. Acceptance tests + will start a small ozone cluster and verify that ozone shell and ozone file + system is fully functional. diff --git a/hadoop-ozone/docs/content/CommandShell.md b/hadoop-ozone/docs/content/CommandShell.md index 45c2c65764..bfb2d395d5 100644 --- a/hadoop-ozone/docs/content/CommandShell.md +++ b/hadoop-ozone/docs/content/CommandShell.md @@ -1,6 +1,9 @@ --- -title: Command Shell -menu: main +title: Ozone CLI +menu: + main: + parent: Client + weight: 1 --- -# Ozone Command Shell + +Ozone has a set of command line tools that can be used to manage ozone. + +All these commands are invoked via the ```ozone``` script. + +The commands supported by ozone are: + + * **classpath** - Prints the class path needed to get the hadoop jar and the + required libraries. + * **fs** - Runs a command on ozone file system. + * **datanode** - Via daemon command, the HDDS data nodes can be started or + stopped. + * **envvars** - Display computed Hadoop environment variables. + * **freon** - Runs the ozone load generator. + * **genesis** - Developer Only, Ozone micro-benchmark application. + * **getozoneconf** - Reads ozone config values from configuration. + * **jmxget** - Get JMX exported values from NameNode or DataNode. + * **om** - Ozone Manager, via daemon command can be started or stopped. + * **sh** - Primary command line interface for ozone. + * **scm** - Storage Container Manager service, via daemon can be + stated or stopped. + * **scmcli** - Developer only, Command Line Interface for the Storage + Container Manager. + * **version** - Prints the version of Ozone and HDDS. + * **genconf** - Generate minimally required ozone configs and output to + ozone-site.xml. + +## Understanding Ozone command shell +The most used command when working with Ozone is the Ozone command shell. +Ozone command shell gives a command shell interface to work against +Ozone. + +The Ozone shell commands take the following format. + +> _ozone sh object action url_ + +**ozone** script is used to invoke all Ozone sub-commands. The ozone shell is +invoked via ```sh``` command. + +The object can be a volume, bucket or a key. The action is various verbs like + create, list, delete etc. -Ozone command shell gives a command shell interface to work against Ozone. -Please note that this document assumes that cluster is deployed -with simple authentication. +Ozone URL can point to a volume, bucket or keys in the following format: -The Ozone commands take the following format. -``` -ozone sh --command_ /volume/bucket/key -user [-root] -``` - -The `port` specified in command should match the port mentioned in the config -property `hdds.rest.http-address`. This property can be set in `ozone-site.xml`. -The default value for the port is `9880` and is used in below commands. - -The `-root` option is a command line short cut that allows *ozone fs* -commands to be run as the user that started the cluster. This is useful to -indicate that you want the commands to be run as some admin user. The only -reason for this option is that it makes the life of a lazy developer more -easier. - -## Volume Commands +_\[scheme\]\[server:port\]/volume/bucket/key_ -The volume commands allow users to create, delete and list the volumes in the -ozone cluster. +Where, -### Create Volume -Volumes can be created only by administrators. Here is an example of creating a volume. -``` -ozone sh -createVolume hive -user bilbo -quota 100TB -root -``` -The above command creates a volume called `hive` owned by user `bilbo`. The -`-root` option allows the command to be executed as user `hdfs` which is an -admin in the cluster. +1. Scheme - Can be one of the following + * o3 - Ozone's native RPC protocol. If you specify this scheme, the + native RPC protocol is used while communicating with Ozone Manager and + data nodes. + * http/https - If an HTTP protocol is specified, then Ozone shell assumes + that you are interested in using the Ozone Rest protocol and falls back + to using the REST protocol instead of RPC. + If no protocol is specified, the Ozone shell defaults to the native RPC + protocol. -### Update Volume -Updates information like ownership and quota on an existing volume. -``` -ozone sh -updateVolume hive -quota 500TB -root -``` +2. Server:Port - This is the address of the Ozone Manager. This can be server + only, in that case, the default port is used. If this value is omitted +then the defaults specified in the ozone-site.xml will be used for Ozone +Manager address. -The above command changes the volume quota of hive from 100TB to 500TB. +Depending on the call, the volume/bucket/key names will be part of the URL. +Please see volume commands, bucket commands, and key commands section for more +detail. -### Delete Volume -Deletes a Volume if it is empty. -``` -ozone sh -deleteVolume /hive -root -``` +## Invoking help -### Info Volume -Info volume command allows the owner or the administrator of the cluster -to read meta-data about a specific volume. -``` -ozone sh -infoVolume /hive -root -``` +Ozone shell help can be invoked at _object_ level or at _action_ level. +For example: -### List Volumes -List volume command can be used by administrator to list volumes of any -user. It can also be used by any user to list their own volumes. -``` -ozone sh -listVolume / -user bilbo -``` +{{< highlight bash >}} +ozone sh volume --help +{{< /highlight >}} -The above command lists all volumes owned by user bilbo. +This will show all possible actions for volumes. -## Bucket Commands - -Bucket commands follow a similar pattern as volume commands. However bucket -commands are designed to be run by the owner of the volume. -Following examples assume that these commands are run by the owner of the -volume or bucket. - -### Create Bucket -Create bucket call allows the owner of a volume to create a bucket. -``` -ozone sh -createBucket /hive/january -``` - -This call creates a bucket called `january` in the volume called `hive`. If -the volume does not exist, then this call will fail. - -### Update Bucket -Updates bucket meta-data, like ACLs. -``` -ozone sh -updateBucket /hive/january -addAcl user:spark:rw -``` -### Delete Bucket -Deletes a bucket if it is empty. -``` -ozone sh -deleteBucket /hive/january -``` - -### Info Bucket -Returns information about a given bucket. -``` -ozone sh -infoBucket /hive/january -``` - -### List Buckets -List buckets in a given volume. -``` -ozone sh -listBucket /hive -``` - -## Ozone Key Commands - -Ozone key commands allows users to put, delete and get keys from Ozone buckets. - -### Put Key -Creates or overwrites a key in Ozone store, -file points to the file you want -to upload. -``` -ozone sh -putKey /hive/january/processed.orc -file processed.orc -``` - -### Get Key -Downloads a file from the Ozone bucket. -``` -ozone sh -getKey /hive/january/processed.orc -file processed.orc.copy -``` - -### Delete Key -Deletes a key from the Ozone store. -``` -ozone sh -deleteKey /hive/january/processed.orc -``` - -### Info Key -Reads key metadata from the Ozone store. -``` -ozone sh -infoKey /hive/january/processed.orc -``` - -### List Keys -List all keys in an Ozone bucket. -``` -ozone sh -listKey /hive/january -``` +or it can be invoked to explain a specific action like +{{< highlight bash >}} +ozone sh volume create --help +{{< /highlight >}} +This command will give you command line options of the create command. diff --git a/hadoop-ozone/docs/content/Concepts.md b/hadoop-ozone/docs/content/Concepts.md new file mode 100644 index 0000000000..125250b9eb --- /dev/null +++ b/hadoop-ozone/docs/content/Concepts.md @@ -0,0 +1,101 @@ +--- +title: Architecture +date: "2017-10-10" +menu: main +--- + + + +Ozone is a redundant, distributed object store build by +leveraging primitives present in HDFS. The primary design point of ozone is scalability, and it aims to scale to billions of objects. + +Ozone consists of volumes, buckets, and keys. A volume is similar to a home directory in the ozone world. Only an administrator can create it. Volumes are used to store buckets. Once a volume is created users can create as many buckets as needed. Ozone stores data as keys which live inside these buckets. + +Ozone namespace is composed of many storage volumes. Storage volumes are also used as the basis for storage accounting. + +To access a key, an Ozone URL has the following format: + +``` +http://servername:port/volume/bucket/key +``` + +Where the server name is the name of a data node, the port is the data node HTTP port. The volume represents the name of the ozone volume; bucket is an ozone bucket created by the user and key represents the file. + +Please look at the [command line interface]({{< ref "CommandShell.md#shell" >}}) for more info. + +Ozone supports both REST and RPC protocols. Clients can choose either of these protocols to communicate with Ozone. Please see the [client documentation]({{< ref "JavaApi.md" >}}) for more details. + +Ozone separates namespace management and block space management; this helps +ozone to scale much better. The namespace is managed by a daemon called +[Ozone Manager ]({{< ref "OzoneManager.md" >}}) (OM), and block space is +managed by [Storage Container Manager] ({{< ref "Hdds.md" >}}) (SCM). + +The data nodes provide replication and ability to store blocks; these blocks are stored in groups to reduce the metadata pressure on SCM. This groups of blocks are called storage containers. Hence the block manager is called storage container +manager. + +Ozone Overview +-------------- + +
The following diagram is a high-level overview of the core components of Ozone.

 + +![Architecture diagram](../../OzoneOverview.svg) + +The main elements of Ozone are
: + +### Ozone Manager
 + +[Ozone Manager]({{< ref "OzoneManager.md" >}}) (OM) takes care of the Ozone's namespace. +All ozone objects like volumes, buckets, and keys are managed by OM. In Short, OM is the metadata manager for Ozone. +OM talks to blockManager(SCM) to get blocks and passes it on to the Ozone +client. Ozone client writes data to these blocks. +OM will eventually be replicated via Apache Ratis for High Availability.
 + +### Storage Container Manager + +[Storage Container Manager]({{< ref "Hdds.md" >}}) (SCM) is the block and cluster manager for Ozone. +SCM along with data nodes offer a service called 'storage containers'. +A storage container is a group unrelated of blocks that are managed together as a single entity. + +SCM offers the following abstractions.

 + +![SCM Abstractions](../../SCMBlockDiagram.png) + +### Blocks +Blocks are similar to blocks in HDFS. They are replicated store of data. Client writes data to blocks. + +### Containers +A collection of blocks replicated and managed together. + +### Pipelines +SCM allows each storage container to choose its method of replication. +For example, a storage container might decide that it needs only one copy of a block +and might choose a stand-alone pipeline. Another storage container might want to have a very high level of reliability and pick a RATIS based pipeline. In other words, SCM allows different kinds of replication strategies to co-exist. The client while writing data, chooses a storage container with required properties. + +### Pools +A group of data nodes is called a pool. For scaling purposes, +we define a pool as a set of machines. This makes management of data nodes easier. + +### Nodes +The data node where data is stored. SCM monitors these nodes via heartbeat. + +### Clients +Ozone ships with a set of clients. Ozone [CLI]({{< ref "CommandShell.md#shell" >}}) is the command line interface like 'hdfs' command.
 [Freon] ({{< ref "Freon.md" >}}) is a load generation tool for Ozone.
 + +### REST Handler +Ozone provides an RPC (Remote Procedure Call) as well as a REST (Representational State Transfer) interface. This allows clients to be written in many languages quickly. Ozone strives to maintain an API compatibility between REST and RPC. +For most purposes, a client can make one line change to switch from REST to RPC or vice versa. 
 + +### Ozone File System +Ozone file system (TODO: Add documentation) is a Hadoop compatible file system. This allows Hadoop services and applications like Hive and Spark to run against +Ozone without any change. + +### Ozone Client +This is similar to DFSClient in HDFS. This is the standard client to talk to Ozone. All other components that we have discussed so far rely on Ozone client. Ozone client supports both RPC and REST protocols. diff --git a/hadoop-ozone/docs/content/Dozone.md b/hadoop-ozone/docs/content/Dozone.md new file mode 100644 index 0000000000..b4e9129880 --- /dev/null +++ b/hadoop-ozone/docs/content/Dozone.md @@ -0,0 +1,107 @@ +--- +title: "Dozone & Dev Tools" +date: 2017-08-10 +menu: + main: + parent: Tools +--- + + + +Dozone stands for docker for ozone. Ozone supports docker to make it easy to develop and test ozone. Starting a docker based ozone container is simple. + +In the `compose/ozone` directory there are two files that define the docker and ozone settings. + +Developers can + +{{< highlight bash >}} +cd compose/ozone +{{< /highlight >}} + +and simply run + +{{< highlight bash >}} +docker-compose up -d +{{< /highlight >}} + +to run a ozone cluster on docker. + +This command will launch a Namenode, OM, SCM and a data node. + +To access the OM UI, one can run 'http://localhost:9874'. + +_Please note_: dozone does not map the data node ports to the 9864. Instead, it maps to the ephemeral port range. So many examples in the command shell will not work if you run those commands from the host machine. To find out where the data node port is listening, you can run the `docker ps` command or always ssh into a container before running ozone commands. + +To shutdown a running docker based ozone cluster, please run + +{{< highlight bash >}} +docker-compose down +{{< /highlight >}} + + +Adding more config settings +--------------------------- +The file called `docker-config` contains all ozone specific config settings. This file is processed to create the ozone-site.xml. + +Useful Docker & Ozone Commands +------------------------------ + +If you make any modifications to ozone, the simplest way to test it is to run freon and unit tests. + +Here are the instructions to run corona in a docker based cluster. + +{{< highlight bash >}} +docker-compose exec datanode bash +{{< /highlight >}} + +This will open a bash shell on the data node container. +Now we can execute corona for load generation. + +{{< highlight bash >}} +ozone freon randomkeys --numOfVolumes=10 --numOfBuckets 10 --numOfKeys 10 +{{< /highlight >}} + +Here is a set helpful commands while working with docker for ozone. +To check the status of the components: + +{{< highlight bash >}} +docker-compose ps +{{< /highlight >}} + +To get logs from a specific node/service: + +{{< highlight bash >}} +docker-compose logs scm +{{< /highlight >}} + + +As the WebUI ports are forwarded to the external machine, you can check the web UI: + +* For the Storage Container Manager: http://localhost:9876 +* For the Ozone Managerr: http://localhost:9874 +* For the Datanode: check the port with docker ps (as there could be multiple data node ports are mapped to the ephemeral port range) +* For the Namenode: http://localhost:9870 + +You can start multiple data nodes with: + +{{< highlight bash >}} +docker-compose scale datanode=3 +{{< /highlight >}} + +You can test the commands from the [Ozone CLI]({{< ref "CommandShell.md#shell" >}}) after opening a new bash shell in one of the containers: + +{{< highlight bash >}} +docker-compose exec datanode bash +{{< /highlight >}} diff --git a/hadoop-ozone/docs/content/Freon.md b/hadoop-ozone/docs/content/Freon.md new file mode 100644 index 0000000000..1563e731f4 --- /dev/null +++ b/hadoop-ozone/docs/content/Freon.md @@ -0,0 +1,61 @@ +--- +title: Freon +date: "2017-09-02T23:58:17-07:00" +menu: + main: + parent: Tools +--- + + +Overview +-------- + +Freon is a load-generator for Ozone. This tool is used for testing the functionality of ozone. + +### Random keys + +In randomkeys mode, the data written into ozone cluster is randomly generated. +Each key will be of size 10 KB. + +The number of volumes/buckets/keys can be configured. The replication type and +factor (eg. replicate with ratis to 3 nodes) Also can be configured. + +For more information use + +`bin/ozone freon --help` + +### Example + +{{< highlight bash >}} +ozone freon randomkeys --numOfVolumes=10 --numOfBuckets 10 --numOfKeys 10 --replicationType=RATIS --factor=THREE +{{< /highlight >}} + +{{< highlight bash >}} +*************************************************** +Status: Success +Git Base Revision: 48aae081e5afacbb3240657556b26c29e61830c3 +Number of Volumes created: 10 +Number of Buckets created: 100 +Number of Keys added: 1000 +Ratis replication factor: THREE +Ratis replication type: RATIS +Average Time spent in volume creation: 00:00:00,035 +Average Time spent in bucket creation: 00:00:00,319 +Average Time spent in key creation: 00:00:03,659 +Average Time spent in key write: 00:00:10,894 +Total bytes written: 10240000 +Total Execution time: 00:00:16,898 +*********************** +{{< /highlight >}} diff --git a/hadoop-ozone/docs/content/GettingStarted.md b/hadoop-ozone/docs/content/GettingStarted.md deleted file mode 100644 index 9b9d49fd15..0000000000 --- a/hadoop-ozone/docs/content/GettingStarted.md +++ /dev/null @@ -1,369 +0,0 @@ ---- -title: Getting started -weight: -2 -menu: main ---- - - -# Ozone - Object store for Apache Hadoop - - -## Introduction - -Ozone is a scalable distributed object store for Hadoop. Ozone supports RPC -and REST APIs for working with Volumes, Buckets and Keys. - -Existing Hadoop applications can use Ozone transparently via a Hadoop Compatible -FileSystem shim. - -### Basic terminology -1. **Volumes** - Volumes are a notion similar to accounts. Volumes can be -created or deleted only by administrators. -1. **Buckets** - A volume can contain zero or more buckets. -1. **Keys** - Keys are unique within a given bucket. - -### Services in a minimal Ozone cluster -1. **Ozone Manager (OM)** - stores Ozone Metadata namely Volumes, -Buckets and Key names. -1. **Storage Container Manager (SCM)** - handles Storage Container lifecycle. -Containers are the unit of replication in Ozone and not exposed to users. -1. **DataNodes** - These are HDFS DataNodes which understand how to store -Ozone Containers. Ozone has been designed to efficiently share storage space -with HDFS blocks. - -## Getting Started - -Ozone is currently work-in-progress and lives in the Hadoop source tree. -The sub-projects (`hadoop-ozone` and `hadoop-hdds`) are part of -the Hadoop source tree but they are not compiled by default and not -part of official Apache Hadoop releases. - -To use Ozone, you have to build a package by yourself and deploy a cluster. - -### Building Ozone - -To build Ozone, please checkout the Hadoop sources from the -[Apache Hadoop git repo](https://git-wip-us.apache.org/repos/asf?p=hadoop.git). -Then checkout the `trunk` branch and build it with the `hdds` profile enabled. - -` -git checkout trunk -mvn clean package -DskipTests=true -Dmaven.javadoc.skip=true -Pdist -Phdds -Dtar -DskipShade -` - -`skipShade` is just to make compilation faster and not required. - -This builds a tarball in your distribution directory which can be used to deploy your -Ozone cluster. The tarball path is `hadoop-dist/target/ozone-${project.version}.tar.gz`. - -At this point you can either setup a physical cluster or run Ozone via -docker. - -### Running Ozone via Docker - -This is the quickest way to bring up an Ozone cluster for development/testing -or if you just want to get a feel for Ozone. It assumes that you have docker installed -on the machine. - -Go to the directory where the docker compose files exist and tell -`docker-compose` to start Ozone. This will start SCM, OM and a single datanode -in the background. - -``` -cd hadoop-dist/target/ozone-*/compose/ozone - -docker-compose up -d -``` - -Now let us run some workload against Ozone. To do that we will run -_freon_, the Ozone load generator after logging into one of the docker -containers for OM, SCM or DataNode. Let's take DataNode for example:. -``` -docker-compose exec datanode bash - -ozone freon -mode offline -validateWrites -numOfVolumes 1 -numOfBuckets 10 -numOfKeys 100 -``` - -You can checkout the OM UI to see the requests information. -``` -http://localhost:9874/ -``` - -If you need more datanodes you can scale up: -``` -docker-compose up --scale datanode=3 -d -``` - -## Running Ozone using a real cluster - -### Configuration - -First initialize Hadoop cluster configuration files like hadoop-env.sh, -core-site.xml, hdfs-site.xml and any other configuration files that are -needed for your cluster. - -#### Update hdfs-site.xml - -The container manager part of Ozone runs inside DataNodes as a pluggable module. -To activate ozone you should define the service plugin implementation class. -**Important**: It should be added to the **hdfs-site.xml** as the plugin should -be activated as part of the normal HDFS Datanode bootstrap. -``` - - dfs.datanode.plugins - org.apache.hadoop.ozone.HddsDatanodeService - -``` - - -#### Create/Generate ozone-site.xml - -Ozone relies on its own configuration file called `ozone-site.xml`. - -The following command will generate a template ozone-site.xml at the specified -path -``` -ozone genconf -output -``` - -The following are the most important settings. - - 1. _*ozone.enabled*_ This is the most important setting for ozone. - Currently, Ozone is an opt-in subsystem of HDFS. By default, Ozone is - disabled. Setting this flag to `true` enables ozone in the HDFS cluster. - Here is an example, - ``` - - ozone.enabled - True - - ``` - 1. **ozone.metadata.dirs** Administrators can specify where the - metadata must reside. Usually you pick your fastest disk (SSD if - you have them on your nodes). OM, SCM and datanode will write the metadata - to these disks. This is a required setting, if this is missing Ozone will - fail to come up. Here is an example, - ``` - - ozone.metadata.dirs - /data/disk1/meta - - ``` - -1. **ozone.scm.names** Ozone is build on top of container framework. Storage - container manager(SCM) is a distributed block service which is used by ozone - and other storage services. - This property allows datanodes to discover where SCM is, so that - datanodes can send heartbeat to SCM. SCM is designed to be highly available - and datanodes assume there are multiple instances of SCM which form a highly - available ring. The HA feature of SCM is a work in progress. So we - configure ozone.scm.names to be a single machine. Here is an example, - ``` - - ozone.scm.names - scm.hadoop.apache.org - - ``` - -1. **ozone.scm.datanode.id** Each datanode that speaks to SCM generates an ID -just like HDFS. This is a mandatory setting. Please note: -This path will be created by datanodes if it doesn't exist already. Here is an - example, - ``` - - ozone.scm.datanode.id - /data/disk1/scm/meta/node/datanode.id - - ``` - -1. **ozone.scm.block.client.address** Storage Container Manager(SCM) offers a - set of services that can be used to build a distributed storage system. One - of the services offered is the block services. OM and HDFS would use this - service. This property describes where OM can discover SCM's block service - endpoint. There is corresponding ports etc, but assuming that we are using - default ports, the server address is the only required field. Here is an - example, - ``` - - ozone.scm.block.client.address - scm.hadoop.apache.org - - ``` - -1. **ozone.om.address** OM server address. This is used by OzoneClient and -Ozone File System. - ``` - - ozone.om.address - om.hadoop.apache.org - - ``` - -#### Ozone Settings Summary - -| Setting | Value | Comment | -|--------------------------------|------------------------------|------------------------------------------------------------------| -| ozone.enabled | True | This enables SCM and containers in HDFS cluster. | -| ozone.metadata.dirs | file path | The metadata will be stored here. | -| ozone.scm.names | SCM server name | Hostname:port or or IP:port address of SCM. | -| ozone.scm.block.client.address | SCM server name and port | Used by services like OM | -| ozone.scm.client.address | SCM server name and port | Used by client side | -| ozone.scm.datanode.address | SCM server name and port | Used by datanode to talk to SCM | -| ozone.om.address | OM server name | Used by Ozone handler and Ozone file system. | - - -#### Sample ozone-site.xml - -``` - - - - - ozone.enabled - True - - - - ozone.metadata.dirs - /data/disk1/ozone/meta - - - - ozone.scm.names - 127.0.0.1 - - - - ozone.scm.client.address - 127.0.0.1:9860 - - - - ozone.scm.block.client.address - 127.0.0.1:9863 - - - - ozone.scm.datanode.address - 127.0.0.1:9861 - - - - ozone.om.address - 127.0.0.1:9874 - - -``` - - - -### Starting Ozone - -Ozone is designed to run concurrently with HDFS. The simplest way to [start -HDFS](../hadoop-common/ClusterSetup.html) is to run `start-dfs.sh` from the -`$HADOOP/sbin/start-dfs.sh`. Once HDFS -is running, please verify it is fully functional by running some commands like - - - *./hdfs dfs -mkdir /usr* - - *./hdfs dfs -ls /* - - Once you are sure that HDFS is running, start Ozone. To start ozone, you - need to start SCM and OM. - -The first time you bring up Ozone, SCM must be initialized. -``` -ozone scm -init -``` - -Start SCM. -``` -ozone --daemon start scm -``` - -Once SCM gets started, OM must be initialized. -``` -ozone om -createObjectStore -``` - -Start OM. -``` -ozone --daemon start om -``` - -If you would like to start HDFS and Ozone together, you can do that by running - a single command. -``` -$HADOOP/sbin/start-ozone.sh -``` - -This command will start HDFS and then start the ozone components. - -Once you have ozone running you can use these ozone [shell](./OzoneCommandShell.html) -commands to start creating a volume, bucket and keys. - -## Diagnosing issues - -Ozone tries not to pollute the existing HDFS streams of configuration and -logging. So ozone logs are by default configured to be written to a file -called `ozone.log`. This is controlled by the settings in `log4j.properties` -file in the hadoop configuration directory. - -Here is the log4j properties that are added by ozone. - - -``` - # - # Add a logger for ozone that is separate from the Datanode. - # - #log4j.debug=true - log4j.logger.org.apache.hadoop.ozone=DEBUG,OZONE,FILE - - # Do not log into datanode logs. Remove this line to have single log. - log4j.additivity.org.apache.hadoop.ozone=false - - # For development purposes, log both to console and log file. - log4j.appender.OZONE=org.apache.log4j.ConsoleAppender - log4j.appender.OZONE.Threshold=info - log4j.appender.OZONE.layout=org.apache.log4j.PatternLayout - log4j.appender.OZONE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ - %X{component} %X{function} %X{resource} %X{user} %X{request} - %m%n - - # Real ozone logger that writes to ozone.log - log4j.appender.FILE=org.apache.log4j.DailyRollingFileAppender - log4j.appender.FILE.File=${hadoop.log.dir}/ozone.log - log4j.appender.FILE.Threshold=debug - log4j.appender.FILE.layout=org.apache.log4j.PatternLayout - log4j.appender.FILE.layout.ConversionPattern=%d{ISO8601} [%t] %-5p \ - (%F:%L) %X{function} %X{resource} %X{user} %X{request} - \ - %m%n -``` - -If you would like to have a single datanode log instead of ozone stuff -getting written to ozone.log, please remove this line or set this to true. -``` -log4j.additivity.org.apache.hadoop.ozone=false -``` - -On the SCM/OM side, you will be able to see -1. `hadoop-hdfs-om-hostname.log` -1. `hadoop-hdfs-scm-hostname.log` - -## Reporting Bugs -Please file any issues you see under [Apache HDDS Project Jira](https://issues.apache.org/jira/projects/HDDS/issues/). - -## References - - [Object store in HDFS: HDFS-7240](https://issues.apache.org/jira/browse/HDFS-7240) - - [Ozone File System: HDFS-13074](https://issues.apache.org/jira/browse/HDFS-13074) - - [Building HDFS on top of new storage layer (HDDS): HDFS-10419](https://issues.apache.org/jira/browse/HDFS-10419) diff --git a/hadoop-ozone/docs/content/Hdds.md b/hadoop-ozone/docs/content/Hdds.md new file mode 100644 index 0000000000..9e7589df34 --- /dev/null +++ b/hadoop-ozone/docs/content/Hdds.md @@ -0,0 +1,49 @@ +--- +title: "Hadoop Distributed Data Store" +date: "2017-09-14" +menu: + main: + parent: Architecture +weight: 10 +--- + +SCM Overview +------------ + +Storage Container Manager or SCM is a very important component of ozone. SCM +offers block and container-based services to Ozone Manager. A container is a +collection of unrelated blocks under ozone. SCM and data nodes work together +to maintain the replication levels needed by the cluster. + +It is easier to look at a putKey operation to understand the role that SCM plays. + +To put a key, a client makes a call to KSM with the following arguments. + +-- putKey(keyName, data, pipeline type, replication count) + +1. keyName - refers to the file name. +2. data - The data that the client wants to write. +3. pipeline type - Allows the client to select the pipeline type. A pipeline + refers to the replication strategy used for replicating a block. Ozone + currently supports Stand Alone and Ratis as two different pipeline types. +4. replication count - This specifies how many copies of the block replica should be maintained. + +In most cases, the client does not specify the pipeline type and replication + count. The default pipeline type and replication count are used. + + +Ozone Manager when it receives the putKey call, makes a call to SCM asking +for a pipeline instance with the specified property. So if the client asked +for RATIS replication strategy and a replication count of three, then OM +requests SCM to return a set of data nodes that meet this capability. + +If SCM can find this a pipeline ( that is a set of data nodes) that can meet +the requirement from the client, then those nodes are returned to OM. OM will +persist this info and return a tuple consisting of {BlockID, ContainerName, and Pipeline}. + +If SCM is not able to find a pipeline, then SCM creates a logical pipeline and then returns it. + + +SCM manages blocks, containers, and pipelines. To return healthy pipelines, +SCM also needs to understand the node health. So SCM listens to heartbeats +from data nodes and acts as the node manager too. diff --git a/hadoop-ozone/docs/content/JavaApi.md b/hadoop-ozone/docs/content/JavaApi.md new file mode 100644 index 0000000000..a46337745d --- /dev/null +++ b/hadoop-ozone/docs/content/JavaApi.md @@ -0,0 +1,152 @@ +--- +title: "Java API" +date: "2017-09-14" +menu: + main: + parent: "Client" +--- + +Introduction +------------- + +Ozone ships with it own client library, that supports both RPC(Remote +Procedure call) and REST(Representational State Transfer). This library is +the primary user interface to ozone. + +It is trivial to switch from RPC to REST or vice versa, by setting the +property _ozone.client.protocol_ in the configuration or by calling the +appropriate factory method. + +## Creating an Ozone client +The Ozone client factory creates the ozone client. It allows the user to +specify the protocol of communication. For example, to get an REST client, we +can use + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getRestClient(); +{{< /highlight >}} + +And to get a a RPC client we can call + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getRpcClient(); +{{< /highlight >}} + +If the user want to create a client based on the configuration, then they can +call + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getClient(); +{{< /highlight >}} + +and an appropriate client based on configuration will be returned. + +## Writing data using Ozone Client + +The hierarchy of data inside ozone is a volume, bucket and a key. A volume +is a collection of buckets. A bucket is a collection of keys. To write data +to the ozone, you need a volume, bucket and a key. + +### Creating a Volume + +Once we have a client, we need to get a reference to the ObjectStore. This +is done via + +{{< highlight java >}} +ObjectStore objectStore = ozClient.getObjectStore(); +{{< /highlight >}} + +An object store represents an active cluster against which the client is working. + +{{< highlight java >}} +// Let us create a volume to store our game assets. +// This uses default arguments for creating that volume. +objectStore.createVolume(“assets”); + +// Let us verify that the volume got created. +OzoneVolume assets = objectStore.getVolume(“assets”); +{{< /highlight >}} + + +It is possible to pass an array of arguments to the createVolume by creating volume arguments. + +### Creating a Bucket + +Once you have a volume, you can create buckets inside the volume. + +{{< highlight bash >}} +// Let us create a bucket called videos. +assets.createBucket(“videos”); +Ozonebucket video = assets.getBucket(“videos”); +{{< /highlight >}} + +At this point we have a usable volume and a bucket. Our volume is called assets and bucket is called videos. + +Now we can create a Key. + +### Reading and Writing a Key + +With a bucket object the users can now read and write keys. The following code reads a video called intro.mp4 from the local disk and stores in the video bucket that we just created. + +{{< highlight bash >}} +// read data from the file, this is a user provided function. +byte [] vidoeData = readFile(“into.mp4”); + +// Create an output stream and write data. +OzoneOutputStream videoStream = video.createKey(“intro.mp4”, 1048576); +videoStream.write(videoData); + +// Close the stream when it is done. + videoStream.close(); + + +// We can use the same bucket to read the file that we just wrote, by creating an input Stream. +// Let us allocate a byte array to hold the video first. +byte[] data = new byte[(int)1048576]; +OzoneInputStream introStream = video.readKey(“intro.mp4”); +// read intro.mp4 into the data buffer +introStream.read(data); +introStream.close(); +{{< /highlight >}} + + +Here is a complete example of the code that we just wrote. Please note the close functions being called in this program. + +{{< highlight java >}} +OzoneClient ozClient = OzoneClientFactory.getClient(); + +// Let us create a volume to store our game assets. +// This default arguments for creating that volume. +objectStore.createVolume(“assets”); + +// Let us verify that the volume got created. +OzoneVolume assets = objectStore.getVolume(“assets”); + +// Let us create a bucket called videos. +assets.createBucket(“videos”); +Ozonebucket video = assets.getBucket(“videos”); + +// read data from the file, this is assumed to be a user provided function. +byte [] vidoeData = readFile(“into.mp4”); + +// Create an output stream and write data. +OzoneOutputStream videoStream = video.createKey(“intro.mp4”, 1048576); +videoStream.write(videoData); + +// Close the stream when it is done. + videoStream.close(); + + +// We can use the same bucket to read the file that we just wrote, by creating an input Stream. +// Let us allocate a byte array to hold the video first. + +byte[] data = new byte[(int)1048576]; +OzoneInputStream introStream = video.readKey(“into.mp4”); +introStream.read(data); + +// Close the stream when it is done. +introStream.close(); + +// Close the client. +ozClient.close(); +{{< /highlight >}} diff --git a/hadoop-ozone/docs/content/KeyCommands.md b/hadoop-ozone/docs/content/KeyCommands.md new file mode 100644 index 0000000000..241310aed9 --- /dev/null +++ b/hadoop-ozone/docs/content/KeyCommands.md @@ -0,0 +1,111 @@ +--- +title: Key Commands +menu: + main: + parent: Client + weight: 3 +--- + +Ozone shell supports the following key commands. + + * [get](#get) + * [put](#put) + * [delete](#delete) + * [info](#info) + * [list](#list) + + +### Get + +The key get command downloads a key from Ozone cluster to local file system. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key in **/volume/bucket/key** format. +| FileName | Local file to download the key to. + + +{{< highlight bash >}} +ozone sh key get /hive/jan/sales.orc sales.orc +{{< /highlight >}} +Downloads the file sales.orc from the _/hive/jan_ bucket and writes to the +local file sales.orc. + +### Put + +Uploads a file from the local file system to the specified bucket. + +***Params:*** + + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key in **/volume/bucket/key** format. +| FileName | Local file to upload. +| -r, --replication | Optional, Number of copies, ONE or THREE are the options. Picks up the default from cluster configuration. + +{{< highlight bash >}} +ozone sh key put /hive/jan/corrected-sales.orc sales.orc +{{< /highlight >}} +The above command will put the sales.orc as a new key into _/hive/jan/corrected-sales.orc_. + +### Delete + +The delete key command removes the key from the bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key. + +{{< highlight bash >}} +ozone sh key delete /hive/jan/corrected-sales.orc +{{< /highlight >}} + +The above command deletes the key _/hive/jan/corrected-sales.orc_. + + +### Info + +The key info commands returns the information about the key. +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the key. + +{{< highlight bash >}} +ozone sh key info /hive/jan/sales.orc +{{< /highlight >}} + +The above command will print out the information about _/hive/jan/sales.orc_ +key. + +### List + +The key list commands allows user to list all keys in a bucket. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -l, --length | Maximum number of results to return. Default: 1000 +| -p, --prefix | Optional, Only buckets that match this prefix will be returned. +| -s, --start | The listing will start from key after the start key. +| Uri | The name of the _volume_. + +{{< highlight bash >}} +ozone sh key list /hive/jan +{{< /highlight >}} + +This command will list all key in the bucket _/hive/jan_. + + + + + +You can try out these commands from the docker instance of the [Alpha +Cluster](runningviadocker.html). diff --git a/hadoop-ozone/docs/content/Metrics.md b/hadoop-ozone/docs/content/Metrics.md deleted file mode 100644 index 64a481fa8c..0000000000 --- a/hadoop-ozone/docs/content/Metrics.md +++ /dev/null @@ -1,170 +0,0 @@ ---- -title: Metrics -menu: main ---- - - - - -HDFS Ozone Metrics -=============== - - - -Overview --------- - -The container metrics that is used in HDFS Ozone. - -### Storage Container Metrics - -The metrics for various storage container operations in HDFS Ozone. - -Storage container is an optional service that can be enabled by setting -'ozone.enabled' to true. -These metrics are only available when ozone is enabled. - -Storage Container Metrics maintains a set of generic metrics for all -container RPC calls that can be made to a datandoe/container. - -Along with the total number of RPC calls containers maintain a set of metrics -for each RPC call. Following is the set of counters maintained for each RPC -operation. - -*Total number of operation* - We maintain an array which counts how -many times a specific operation has been performed. -Eg.`NumCreateContainer` tells us how many times create container has been -invoked on this datanode. - -*Total number of pending operation* - This is an array which counts how -many times a specific operation is waitting to be processed from the client -point of view. -Eg.`NumPendingCreateContainer` tells us how many create container requests that -waitting to be processed. - -*Average latency of each pending operation in nanoseconds* - The average latency -of the operation from the client point of view. -Eg. `CreateContainerLatencyAvgTime` - This tells us the average latency of -Create Container from the client point of view. - -*Number of bytes involved in a specific command* - This is an array that is -maintained for all operations, but makes sense only for read and write -operations. - -While it is possible to read the bytes in update container, it really makes -no sense, since no data stream involved. Users are advised to use this -metric only when it makes sense. Eg. `BytesReadChunk` -- Tells us how -many bytes have been read from this data using Read Chunk operation. - -*Average Latency of each operation* - The average latency of the operation. -Eg. `LatencyCreateContainerAvgTime` - This tells us the average latency of -Create Container. - -*Quantiles for each of these operations* - The 50/75/90/95/99th percentile -of these operations. Eg. `CreateContainerNanos60s50thPercentileLatency` -- -gives latency of the create container operations at the 50th percentile latency -(1 minute granularity). We report 50th, 75th, 90th, 95th and 99th percentile -for all RPCs. - -So this leads to the containers reporting these counters for each of these -RPC operations. - -| Name | Description | -|:---- |:---- | -| `NumOps` | Total number of container operations | -| `CreateContainer` | Create container operation | -| `ReadContainer` | Read container operation | -| `UpdateContainer` | Update container operations | -| `DeleteContainer` | Delete container operations | -| `ListContainer` | List container operations | -| `PutKey` | Put key operations | -| `GetKey` | Get key operations | -| `DeleteKey` | Delete key operations | -| `ListKey` | List key operations | -| `ReadChunk` | Read chunk operations | -| `DeleteChunk` | Delete chunk operations | -| `WriteChunk` | Write chunk operations| -| `ListChunk` | List chunk operations | -| `CompactChunk` | Compact chunk operations | -| `PutSmallFile` | Put small file operations | -| `GetSmallFile` | Get small file operations | -| `CloseContainer` | Close container operations | - -### Storage Container Manager Metrics - -The metrics for containers that managed by Storage Container Manager. - -Storage Container Manager (SCM) is a master service which keeps track of -replicas of storage containers. It also manages all data nodes and their -states, dealing with container reports and dispatching commands for execution. - -Following are the counters for containers: - -| Name | Description | -|:---- |:---- | -| `LastContainerReportSize` | Total size in bytes of all containers in latest container report that SCM received from datanode | -| `LastContainerReportUsed` | Total number of bytes used by all containers in latest container report that SCM received from datanode | -| `LastContainerReportKeyCount` | Total number of keys in all containers in latest container report that SCM received from datanode | -| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers in latest container report that SCM received from datanode | -| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers in latest container report that SCM received from datanode | -| `LastContainerReportReadCount` | Total number of times containers have been read from in latest container report that SCM received from datanode | -| `LastContainerReportWriteCount` | Total number of times containers have been written to in latest container report that SCM received from datanode | -| `ContainerReportSize` | Total size in bytes of all containers over whole cluster | -| `ContainerReportUsed` | Total number of bytes used by all containers over whole cluster | -| `ContainerReportKeyCount` | Total number of keys in all containers over whole cluster | -| `ContainerReportReadBytes` | Total number of bytes have been read from all containers over whole cluster | -| `ContainerReportWriteBytes` | Total number of bytes have been written into all containers over whole cluster | -| `ContainerReportReadCount` | Total number of times containers have been read from over whole cluster | -| `ContainerReportWriteCount` | Total number of times containers have been written to over whole cluster | - -### Key Space Metrics - -The metrics for various Ozone Manager operations in HDFS Ozone. - -The Ozone Manager (OM) is a service that similar to the Namenode in HDFS. -In the current design of OM, it maintains metadata of all volumes, buckets and keys. -These metrics are only available when ozone is enabled. - -Following is the set of counters maintained for each key space operation. - -*Total number of operation* - We maintain an array which counts how -many times a specific operation has been performed. -Eg.`NumVolumeCreate` tells us how many times create volume has been -invoked in OM. - -*Total number of failed operation* - This type operation is opposite to the above -operation. -Eg.`NumVolumeCreateFails` tells us how many times create volume has been invoked -failed in OM. - -Following are the counters for each of key space operations. - -| Name | Description | -|:---- |:---- | -| `VolumeCreate` | Create volume operation | -| `VolumeUpdates` | Update volume property operation | -| `VolumeInfos` | Get volume information operation | -| `VolumeCheckAccesses` | Check volume access operation | -| `VolumeDeletes` | Delete volume operation | -| `VolumeLists` | List volume operation | -| `BucketCreates` | Create bucket operation | -| `BucketInfos` | Get bucket information operation | -| `BucketUpdates` | Update bucket property operation | -| `BucketDeletes` | Delete bucket operation | -| `BucketLists` | List bucket operation | -| `KeyAllocate` | Allocate key operation | -| `KeyLookup` | Look up key operation | -| `KeyDeletes` | Delete key operation | -| `KeyLists` | List key operation | diff --git a/hadoop-ozone/docs/content/OzoneFS.md b/hadoop-ozone/docs/content/OzoneFS.md new file mode 100644 index 0000000000..f78416f506 --- /dev/null +++ b/hadoop-ozone/docs/content/OzoneFS.md @@ -0,0 +1,64 @@ +--- +title: Ozone File System +date: 2017-09-14 +menu: main +menu: + main: + parent: Client +--- + +There are many Hadoop compatible files systems under Hadoop. Hadoop compatible file systems ensures that storage backends like Ozone can easily be integrated into Hadoop eco-system. + +## Setting up the Ozone file system + +To create an ozone file system, we have to choose a bucket where the file system would live. This bucket will be used as the backend store for OzoneFileSystem. All the files and directories will be stored as keys in this bucket. + +Please run the following commands to create a volume and bucket, if you don't have them already. + +{{< highlight bash >}} +ozone oz volume create /volume +ozone oz bucket create /volume/bucket +{{< /highlight >}} + +Once this is created, please make sure that bucket exists via the listVolume or listBucket commands. + +Please add the following entry to the core-site.xml. + +{{< highlight xml >}} + + fs.o3.impl + org.apache.hadoop.fs.ozone.OzoneFileSystem + + + fs.default.name + o3://localhost:9864/volume/bucket + +{{< /highlight >}} + +This will make this bucket to be the default file system for HDFS dfs commands and register the o3 file system type.. + +You also need to add the ozone-filesystem.jar file to the classpath: + +{{< highlight bash >}} +export HADOOP_CLASSPATH=/opt/ozone/share/hadoop/ozonefs/hadoop-ozone-filesystem.jar +{{< /highlight >}} + + + + +Once the default Filesystem has been setup, users can run commands like ls, put, mkdir, etc. +For example, + +{{< highlight bash >}} +hdfs dfs -ls / +{{< /highlight >}} + +or + +{{< highlight bash >}} +hdfs dfs -mkdir /users +{{< /highlight >}} + + +Or put command etc. In other words, all programs like Hive, Spark, and Distcp will work against this file system. +Please note that any keys created/deleted in the bucket using methods apart from OzoneFileSystem will show up as diectories and files in the Ozone File System. diff --git a/hadoop-ozone/docs/content/OzoneManager.md b/hadoop-ozone/docs/content/OzoneManager.md new file mode 100644 index 0000000000..87edf33921 --- /dev/null +++ b/hadoop-ozone/docs/content/OzoneManager.md @@ -0,0 +1,61 @@ +--- +title: "Ozone Manager" +date: "2017-09-14" +menu: + main: + parent: Architecture +weight: 11 +--- + +OM Overview +------------- + +Ozone Manager or OM is the namespace manager for Ozone. The clients (RPC clients, Rest proxy, Ozone file system, etc.) communicate with OM to create and delete various ozone objects. + +Each ozone volume is the root of a namespace under OM. This is very different from HDFS which provides a single rooted file system. + +Ozone's namespace is a collection of volumes or is a forest instead of a +single rooted tree as in HDFS. This property makes it easy to deploy multiple + OMs for scaling, this feature is under development. + +OM Metadata +----------------- + +Conceptually, OM maintains a list of volumes, buckets, and keys. For each user, it maintains a list of volumes. For each volume, the list of buckets and for each bucket the list of keys. + +Right now, OM is a single instance service. Ozone already relies on Apache Ratis (A Replicated State Machine based on Raft protocol). OM will be extended to replicate all its metadata via Ratis. With that, OM will be highly available. + +OM UI +------------ + +OM supports a simple UI for the time being. The default port of OM is 9874. To access the OM UI, the user can connect to http://OM:port or for a concrete example, +``` +http://omserver:9874/ +``` +OM UI primarily tries to measure load and latency of OM. The first section of OM UI relates to the number of operations seen by the cluster broken down by the object, operation and whether the operation was successful. + +The latter part of the UI is focused on latency and number of operations that OM is performing. + +One of the hardest problems in HDFS world is discovering the numerous settings offered to tune HDFS. Ozone solves that problem by tagging the configs. To discover settings, click on "Common Tools"->Config. This will take you to the ozone config UI. + +Config UI +------------ + +The ozone config UI is a matrix with row representing the tags, and columns representing All, OM and SCM. + +Suppose a user wanted to discover the required settings for ozone. Then the user can tick the checkbox that says "Required." +This will filter out all "Required" settings along with the description of what each setting does. + +The user can combine different checkboxes and UI will combine the results. That is, If you have more than one row selected, then all keys for those chosen tags are displayed together. + +We are hopeful that this leads to a more straightforward way of discovering settings that manage ozone. + + +OM and SCM +------------------- +[Storage container manager]({{< ref "Hdds.md" >}}) or (SCM) is the block manager + for ozone. When a client requests OM for a set of data nodes to write data, OM talk to SCM and gets a block. + +A block returned by SCM contains a pipeline, which is a set of nodes that we participate in that block replication. + +So OM is dependent on SCM for reading and writing of Keys. However, OM is independent of SCM while doing metadata operations like ozone volume or bucket operations. diff --git a/hadoop-ozone/docs/content/RealCluster.md b/hadoop-ozone/docs/content/RealCluster.md new file mode 100644 index 0000000000..5d89aa109b --- /dev/null +++ b/hadoop-ozone/docs/content/RealCluster.md @@ -0,0 +1,71 @@ +--- +title: Starting an Ozone Cluster +weight: 1 +menu: + main: + parent: Starting + weight: 3 +--- + + +Before we boot up the Ozone cluster, we need to initialize both SCM and Ozone Manager. + +{{< highlight bash >}} +ozone scm -init +{{< /highlight >}} +This allows SCM to create the cluster Identity and initialize its state. +The ```init``` command is similar to Namenode format. Init command is executed only once, that allows SCM to create all the required on-disk structures to work correctly. +{{< highlight bash >}} +ozone --daemon start scm +{{< /highlight >}} + +Once we know SCM is up and running, we can create an Object Store for our use. This is done by running the following command. + +{{< highlight bash >}} +ozone om -createObjectStore +{{< /highlight >}} + + +Once Ozone manager has created the Object Store, we are ready to run the name +services. + +{{< highlight bash >}} +ozone --daemon start om +{{< /highlight >}} + +At this point Ozone's name services, the Ozone manager, and the block service SCM is both running. +**Please note**: If SCM is not running +```createObjectStore``` command will fail. SCM start will fail if on-disk data structures are missing. So please make sure you have done both ```init``` and ```createObjectStore``` commands. + +Now we need to start the data nodes. Please run the following command on each datanode. +{{< highlight bash >}} +ozone --daemon start datanode +{{< /highlight >}} + +At this point SCM, Ozone Manager and data nodes are up and running. + +***Congratulations!, You have set up a functional ozone cluster.*** + +------- +If you want to make your life simpler, you can just run +{{< highlight bash >}} +ozone scm -init +ozone om -createObjectStore +start-ozone.sh +{{< /highlight >}} +This assumes that you have set up the slaves file correctly and ssh +configuration that allows ssh-ing to all data nodes. This is the same as the +HDFS configuration, so please refer to HDFS documentation on how to set this +up. diff --git a/hadoop-ozone/docs/content/Rest.md b/hadoop-ozone/docs/content/Rest.md index 2e935d6d45..a4c405089b 100644 --- a/hadoop-ozone/docs/content/Rest.md +++ b/hadoop-ozone/docs/content/Rest.md @@ -1,6 +1,8 @@ --- -title: Ozone REST API -menu: main +title: REST API +menu: + main: + parent: Client --- -Ozone REST API's. -=================== - - - -Overview --------- - The Ozone REST API's allows user to access ozone via REST protocol. -Authentication and Authorization --------------------- +## Authentication and Authorization For time being, The default authentication mode of REST API is insecure access mode, which is *Simple* mode. Under this mode, ozone server trusts the user @@ -47,8 +40,7 @@ authorized to obtain administrator privilege by using HTTP header for example set following header *Authorization: OZONE root* in the HTTP request, then ozone will authorize the client with administrator privilege. -Common REST Headers --------------------- +## Common REST Headers The following HTTP headers must be set for each REST call. @@ -58,8 +50,7 @@ The following HTTP headers must be set for each REST call. | Date | Standard HTTP header that represents dates. The format is - day of the week, month, day, year and time (military time format) in GMT. Any other time zone will be rejected by ozone server. Eg. *Date : Mon, Apr 4, 2016 06:22:00 GMT*. This field is required. | | x-ozone-version | A required HTTP header to indicate which version of API this call will be communicating to. E.g *x-ozone-version: v1*. Currently ozone only publishes v1 version API. | -Common Reply Headers --------------------- +## Common Reply Headers The common reply headers are part of all Ozone server replies. @@ -69,8 +60,7 @@ The common reply headers are part of all Ozone server replies. | x-ozone-request-id | This is a UUID string that represents an unique request ID. This ID is used to track the request through the ozone system and is useful for debugging purposes. | | x-ozone-server-name | Fully qualified domain name of the sever which handled the request. | -Volume APIs --------------------- +## Volume APIs ### Create a Volume @@ -222,8 +212,7 @@ this request gets all volumes owned by *bilbo* and each volume's name contains p ] } -Bucket APIs --------------------- +## Bucket APIs ### Create Bucket @@ -389,8 +378,7 @@ this request lists all the buckets under volume *volume-of-bilbo*, and the resul ] } -Key APIs ------------------- +## Key APIs ### Put Key diff --git a/hadoop-ozone/docs/content/RunningViaDocker.md b/hadoop-ozone/docs/content/RunningViaDocker.md new file mode 100644 index 0000000000..3ff3328c6f --- /dev/null +++ b/hadoop-ozone/docs/content/RunningViaDocker.md @@ -0,0 +1,70 @@ +--- +title: Alpha Cluster +weight: 1 +menu: + main: + parent: Starting + weight: 1 +--- + + + +***This is an alpha release of Ozone. Please don't use this release in +production.*** Please check the road map page for features under +development. + +The easiest way to run ozone is to download the release tarball and launch +ozone via Docker. Docker will create a small ozone cluster on your machine, +including the data nodes and ozone services. + +## Running Ozone via Docker + + +**This assumes that you have Docker installed on the machine.** + +* Download the Ozone tarball and untar it. + +* Go to the directory where the docker compose files exist and tell +`docker-compose` to start Ozone in the background. This will start a small +ozone instance on your machine. + +{{< highlight bash >}} +cd ozone-0.2.1-SNAPSHOT/compose/ozone/ + +docker-compose up -d +{{< /highlight >}} + + +To verify that ozone is working as expected, let us log into a data node and +run _freon_, the load generator for Ozone. The ```exec datanode bash``` command +will open a bash shell on the datanode. The ozone freon command is executed +within the datanode container. You can quit freon via CTRL-C any time. The +```rk``` profile instructs freon to generate random keys. + +{{< highlight bash >}} +docker-compose exec datanode bash +ozone freon rk +{{< /highlight >}} + +You can check out the **OzoneManager UI** at http://localhost:9874/ to see the +activity generated by freon. +While you are there, please don't forget to check out the ozone configuration explorer. + +***Congratulations, You have just run your first ozone cluster.*** + +To shutdown the cluster, please run +{{< highlight bash >}} +docker-compose down +{{< /highlight >}} \ No newline at end of file diff --git a/hadoop-ozone/docs/content/RunningWithHDFS.md b/hadoop-ozone/docs/content/RunningWithHDFS.md new file mode 100644 index 0000000000..0216fcb3f6 --- /dev/null +++ b/hadoop-ozone/docs/content/RunningWithHDFS.md @@ -0,0 +1,61 @@ +--- +title: Running concurrently with HDFS +weight: 1 +menu: + main: + parent: Starting + weight: 4 +--- + +Ozone is designed to work with HDFS. So it is easy to deploy ozone in an +existing HDFS cluster. + +Ozone does *not* support security today. It is a work in progress and tracked + in +[HDDS-4](https://issues.apache.org/jira/browse/HDDS-4). If you enable ozone +in a secure HDFS cluster, for your own protection Ozone will refuse to work. + +In other words, till Ozone security work is done, Ozone will not work in any +secure clusters. + +The container manager part of Ozone runs inside DataNodes as a pluggable module. +To activate ozone you should define the service plugin implementation class. + + + +{{< highlight xml >}} + + dfs.datanode.plugins + org.apache.hadoop.ozone.HddsDatanodeService + +{{< /highlight >}} + +You also need to add the ozone-datanode-plugin jar file to the classpath: + +{{< highlight bash >}} +export HADOOP_CLASSPATH=/opt/ozone/share/hadoop/ozoneplugin/hadoop-ozone-datanode-plugin.jar +{{< /highlight >}} + + + +To start ozone with HDFS you should start the the following components: + + 1. HDFS Namenode (from Hadoop distribution) + 2. HDFS Datanode (from the Hadoop distribution with the plugin on the + classpath from the Ozone distribution) + 3. Ozone Manager (from the Ozone distribution) + 4. Storage Container manager (from the Ozone distribution) + +Please check the log of the datanode whether the HDDS/Ozone plugin is started or +not. Log of datanode should contain something like this: + +``` +2018-09-17 16:19:24 INFO HddsDatanodeService:158 - Started plug-in org.apache.hadoop.ozone.web.OzoneHddsDatanodeService@6f94fb9d +``` + + \ No newline at end of file diff --git a/hadoop-ozone/docs/content/SCMCLI.md b/hadoop-ozone/docs/content/SCMCLI.md new file mode 100644 index 0000000000..5953988f37 --- /dev/null +++ b/hadoop-ozone/docs/content/SCMCLI.md @@ -0,0 +1,26 @@ +--- +title: "SCMCLI" +date: 2017-08-10 +menu: + main: + parent: Tools +--- + + +SCM is the block service for Ozone. It is also the workhorse for ozone. But user process never talks to SCM. However, being able to read the state of SCM is useful. + +SCMCLI allows the developer to access SCM directly. Please note: Improper usage of this tool can destroy your cluster. Unless you know exactly what you are doing, Please do *not* use this tool. In other words, this is a developer only tool. We might even remove this command in future to prevent improper use. + +[^1]: This assumes that you have a working docker installation on the development machine. diff --git a/hadoop-ozone/docs/content/Settings.md b/hadoop-ozone/docs/content/Settings.md new file mode 100644 index 0000000000..bfa644e06c --- /dev/null +++ b/hadoop-ozone/docs/content/Settings.md @@ -0,0 +1,139 @@ +--- +title: Configuration +weight: 1 +menu: + main: + parent: Starting + weight: 2 +--- + + + + + +If you are feeling adventurous, you can setup ozone in a real cluster. +Setting up a real cluster requires us to understand the components of Ozone. +Ozone is designed to work concurrently with HDFS. However, Ozone is also +capable of running independently. The components of ozone are the same in both approaches. + +## Ozone Components + +1. Ozone Manager - Is the server that is in charge of the namespace of Ozone. Ozone Manager is responsible for all volume, bucket and key operations. +2. Storage Container Manager - Acts as the block manager. Ozone Manager +requests blocks from SCM, to which clients can write data. +3. Datanodes - Ozone data node code runs inside the HDFS datanode or in the independent deployment case runs an ozone datanode daemon. + + + + +## Setting up an Ozone only cluster + +* Please untar the ozone-0.2.1-SNAPSHOT to the directory where you are going +to run Ozone from. We need Ozone jars on all machines in the cluster. So you +need to do this on all machines in the cluster. + +* Ozone relies on a configuration file called ```ozone-site.xml```. To +generate a template that you can replace with proper values, please run the +following command. This will generate a template called ```ozone-site.xml``` at +the specified path (directory). + +{{< highlight bash >}} +ozone genconf -output +{{< /highlight >}} + +Let us look at the settings inside the generated file (ozone-site.xml) and +how they control ozone. Once the right values are defined, this file +needs to be copied to ```ozone directory/etc/Hadoop```. + + +* **ozone.enabled** This is the most critical setting for ozone. +Ozone is a work in progress and users have to enable this service explicitly. +By default, Ozone is disabled. Setting this flag to `true` enables ozone in the +HDFS or Ozone cluster. + +Here is an example, + +{{< highlight xml >}} + + ozone.enabled + True + +{{< /highlight >}} + +* **ozone.metadata.dirs** Allows Administrators to specify where the + metadata must reside. Usually you pick your fastest disk (SSD if + you have them on your nodes). OzoneManager, SCM and datanode will write the + metadata to this path. This is a required setting, if this is missing Ozone + will fail to come up. + + Here is an example, + +{{< highlight xml >}} + + ozone.metadata.dirs + /data/disk1/meta + +{{< /highlight >}} + +* **ozone.scm.names** Storage container manager(SCM) is a distributed block + service which is used by ozone. This property allows data nodes to discover + SCM's address. Data nodes send heartbeat to SCM. + Until HA feature is complete, we configure ozone.scm.names to be a + single machine. + + Here is an example, + + {{< highlight xml >}} + + ozone.scm.names + scm.hadoop.apache.org + + {{< /highlight >}} + + * **ozone.scm.datanode.id** Data nodes generate a Unique ID called Datanode + ID. This identity is written to the file specified by this path. *Data nodes + will create this path if it doesn't exist already.* + +Here is an example, +{{< highlight xml >}} + + ozone.scm.datanode.id + /data/disk1/meta/node/datanode.id + +{{< /highlight >}} + +* **ozone.om.address** OM server address. This is used by OzoneClient and +Ozone File System. + +Here is an example, +{{< highlight xml >}} + + ozone.om.address + ozonemanager.hadoop.apache.org + +{{< /highlight >}} + + +### Ozone Settings Summary + +| Setting | Value | Comment | +|--------------------------------|------------------------------|------------------------------------------------------------------| +| ozone.enabled | true | This enables SCM and containers in HDFS cluster. | +| ozone.metadata.dirs | file path | The metadata will be stored here. | +| ozone.scm.names | SCM server name | Hostname:port or IP:port address of SCM. | +| ozone.scm.block.client.address | SCM server name and port | Used by services like OM | +| ozone.scm.client.address | SCM server name and port | Used by client-side | +| ozone.scm.datanode.address | SCM server name and port | Used by datanode to talk to SCM | +| ozone.om.address | OM server name | Used by Ozone handler and Ozone file system. | diff --git a/hadoop-ozone/docs/content/VolumeCommands.md b/hadoop-ozone/docs/content/VolumeCommands.md new file mode 100644 index 0000000000..45811c59bf --- /dev/null +++ b/hadoop-ozone/docs/content/VolumeCommands.md @@ -0,0 +1,100 @@ +--- +title: Volume Commands +menu: + main: + parent: Client + weight: 2 +--- + +Volume commands generally need administrator privileges. The ozone shell supports the following volume commands. + + * [create](#create) + * [delete](#delete) + * [info](#info) + * [list](#list) + * [update](#update) + +### Create + +The volume create command allows an administrator to create a volume and +assign it to a user. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -q, --quota | Optional, This argument that specifies the maximum size this volume can use in the Ozone cluster. | +| -u, --user | Required, The name of the user who owns this volume. This user can create, buckets and keys on this volume. | +| Uri | The name of the volume. | + +{{< highlight bash >}} +ozone sh volume create --quota=1TB --user=bilbo /hive +{{< /highlight >}} + +The above command will create a volume called _hive_ on the ozone cluster. This +volume has a quota of 1TB, and the owner is _bilbo_. + +### Delete + +The volume delete commands allows an administrator to delete a volume. If the +volume is not empty then this command will fail. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the volume. + +{{< highlight bash >}} +ozone sh volume delete /hive +{{< /highlight >}} + +The above command will delete the volume hive, if the volume has no buckets +inside it. + +### Info + +The volume info commands returns the information about the volume including +quota and owner information. +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| Uri | The name of the volume. + +{{< highlight bash >}} +ozone sh volume info /hive +{{< /highlight >}} + +The above command will print out the information about hive volume. + +### List + +The volume list command will list the volumes owned by a user. + +{{< highlight bash >}} +ozone sh volume list --user hadoop +{{< /highlight >}} + +The above command will print out all the volumes owned by the user hadoop. + +### Update + +The volume update command allows changing of owner and quota on a given volume. + +***Params:*** + +| Arguments | Comment | +|--------------------------------|-----------------------------------------| +| -q, --quota | Optional, This argument that specifies the maximum size this volume can use in the Ozone cluster. | +| -u, --user | Optional, The name of the user who owns this volume. This user can create, buckets and keys on this volume. | +| Uri | The name of the volume. | + +{{< highlight bash >}} +ozone sh volume update --quota=10TB /hive +{{< /highlight >}} + +The above command updates the volume quota to 10TB. + +You can try out these commands from the docker instance of the [Alpha +Cluster](runningviadocker.html). diff --git a/hadoop-ozone/docs/content/_index.md b/hadoop-ozone/docs/content/_index.md index 383b2e0ef0..19340bfb41 100644 --- a/hadoop-ozone/docs/content/_index.md +++ b/hadoop-ozone/docs/content/_index.md @@ -17,86 +17,20 @@ weight: -10 limitations under the License. See accompanying LICENSE file. --> -
Ozone is an Object store for Apache Hadoop. It aims to scale to billions of -keys. 
The following is a high-level overview of the core components of Ozone.

 +# Apache Hadoop Ozone -![Ozone Architecture Overview](./OzoneOverview.png) 

 +Ozone is a scalable, distributed object store for Hadoop. Applications like +Apache Spark, Hive and YARN, can run against Ozone without any +modifications. Ozone comes with a [Java client library]({{< ref "JavaApi.md" +>}}) and a [command line interface] ({{< ref "CommandShell.md#shell" >}}) which makes it easy to use Ozone. This client library supports both RPC and REST protocols. -The main elements of Ozone are
: +Ozone consists of volumes, buckets, and Keys. -## Clients +* Volumes are similar to user accounts. Only administrators can create or delete volumes. +* Buckets are similar to directories. A bucket can contain any number of keys, but buckets cannot contain other buckets. +* Keys are similar to files. A bucket can contain any number of keys. -Ozone ships with a set of ready-made clients. They are 
Ozone CLI and Freon.
 - * [Ozone CLI](./OzoneCommandShell.html) is the command line interface like 'hdfs' command.
 - * Freon is a load generation tool for Ozone.
 +}}"> -## REST Handler - -Ozone provides both an RPC (Remote Procedure Call) as well as a REST -(Representational State Transfer) style interface. This allows clients to be -written in many languages quickly. Ozone strives to maintain a similar -interface between REST and RPC. The Rest handler offers the REST protocol -services of Ozone. - -For most purposes, a client can make one line change to switch from REST to -RPC or vice versa. 
 - -## Ozone File System - -Ozone file system (TODO: Add documentation) is a Hadoop compatible file system. -This is the important user-visible component of ozone. -This allows Hadoop services and applications like Hive/Spark to run against -Ozone without any change. - -## Ozone Client - -This is like DFSClient in HDFS. This acts as the standard client to talk to -Ozone. All other components that we have discussed so far rely on Ozone client -(TODO: Add Ozone client documentation).
 - -## Ozone Manager - -Ozone Manager (OM) takes care of the Ozone's namespace. -All ozone entities like volumes, buckets and keys are managed by OM -(TODO: Add OM documentation). In short, OM is the metadata manager for Ozone. -OM talks to blockManager(SCM) to get blocks and passes it on to the Ozone -client. Ozone client writes data to these blocks. -OM will eventually be replicated via Apache Ratis for High Availability.
 - -## Storage Container Manager -Storage Container Manager (SCM) is the block and cluster manager for Ozone. -SCM along with data nodes offer a service called 'containers'. -A container is a group unrelated of blocks that are managed together -as a single entity. - -SCM offers the following abstractions.

 - -![SCM Abstractions](../SCMBlockDiagram.png) - -### Blocks - -Blocks are like blocks in HDFS. They are replicated store of data. - -### Containers - -A collection of blocks replicated and managed together. - -### Pipelines - -SCM allows each container to choose its method of replication. -For example, a container might decide that it needs only one copy of a block -and might choose a stand-alone pipeline. Another container might want to have -a very high level of reliability and pick a RATIS based pipeline. In other -words, SCM allows different kinds of replication strategies to co-exist. - -### Pools - -A group of data nodes is called a pool. For scaling purposes, -we define a pool as a set of machines. This makes management of datanodes -easier. - -### Nodes - -The data node where data is stored. diff --git a/hadoop-ozone/docs/pom.xml b/hadoop-ozone/docs/pom.xml index 92680ad775..d8edd15e99 100644 --- a/hadoop-ozone/docs/pom.xml +++ b/hadoop-ozone/docs/pom.xml @@ -47,10 +47,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> dev-support/bin/generate-site.sh - - -b - /docs - diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html index 0fdd1ba4a7..1970f73e10 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/_default/single.html @@ -21,7 +21,10 @@
{{ partial "sidebar.html" . }}
+

{{ .Title }}

+
{{ .Content }} +
diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html index c1f47a95fa..220db57034 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/header.html @@ -23,9 +23,9 @@ Documentation for Apache Hadoop Ozone - + - + diff --git a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/sidebar.html b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/sidebar.html index b043911c4c..36bed1ae1c 100644 --- a/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/sidebar.html +++ b/hadoop-ozone/docs/themes/ozonedoc/layouts/partials/sidebar.html @@ -12,6 +12,7 @@ limitations under the License. See accompanying LICENSE file. -->